diff options
| -rw-r--r-- | python/quote_parsing/__main__.py | 3 | ||||
| -rw-r--r-- | python/quote_parsing/parse_emails.py | 29 |
2 files changed, 26 insertions, 6 deletions
diff --git a/python/quote_parsing/__main__.py b/python/quote_parsing/__main__.py index 0540f90c..1b59bd20 100644 --- a/python/quote_parsing/__main__.py +++ b/python/quote_parsing/__main__.py @@ -1,6 +1,5 @@ import datetime import logging -import os import pandas as pd import pickle import sys @@ -39,7 +38,7 @@ for f in emails: continue else: try: - key, (option_stack, fwd_index) = parse_email(f, date_composed) + key, (option_stack, fwd_index) = parse_email(f, date_composed, conn) except RuntimeError as e: logger.error(e) else: diff --git a/python/quote_parsing/parse_emails.py b/python/quote_parsing/parse_emails.py index 04117504..13e8b828 100644 --- a/python/quote_parsing/parse_emails.py +++ b/python/quote_parsing/parse_emails.py @@ -3,8 +3,8 @@ import re import psycopg2.sql as sql import datetime import pickle -import sys from . import logger +from functools import partial, lru_cache from quantlib.time.imm import next_date from quantlib.time.api import Date, pydate_from_qldate from unicodedata import normalize @@ -576,20 +576,39 @@ subject_gs = re.compile(r"(?:FW: |Fwd: )?GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d. subject_sg = re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)") subject_citi = re.compile(r"(?:Fwd:)?Citi Options: (IG|HY)(\d{2}) 5Y") subject_cs = re.compile( - r"CS CDX (IG|HY)(\d{2}) Options -\s+(?:\d{2}/\d{2}/\d{2}\s+)?Ref = ([\d.]+)[^\d]*" + r"CS CDX (?P<index>IG|HY)(?P<series>\d{2})_?v?(?P<version>\d)? Options -\s+(?:\d{2}/\d{2}/\d{2}\s+)?Ref = (?P<ref>[\d.]+)[^\d]*" ) subject_bnp = re.compile(r"CDX OPTIONS RUN: (IG|HY)(\d{2}).*") -def parse_email(email, date_received): +def get_current_version(index, series, d, conn): + with conn.cursor() as c: + c.execute( + "select max(version) FROM index_version " + "WHERE index=%s and series=%s and lastdate <=%s", + params=(index, series, d), + ) + version, = c.fetchone() + return version + + +def parse_email(email, date_received, conn): + get_version = lru_cache()(partial(get_current_version, conn=conn)) with email.open("rt") as fh: subject = fh.readline().lstrip() for source in ["BAML", "GS", "MS", "NOM", "SG", "CITI", "CS", "BNP"]: m = globals()[f"subject_{source.lower()}"].match(subject) if m: + version = None if source in ["BAML", "CITI", "BNP"]: indextype, series = m.groups() + elif source == "CS": + d = m.groupsdict() + version = d.get("version") + indextype = d["index"] + series = d["series"] + ref = float(d["ref"]) else: indextype, series, ref = m.groups() ref = float(ref) @@ -606,7 +625,8 @@ def parse_email(email, date_received): "America/New_York" ) fh.seek(cur_pos) - + if version is None: + version = get_version(index, series, quotedate) expiration_dates = list_imm_dates(quotedate) parse_fun = globals()[f"parse_{source.lower()}"] key = (quotedate, indextype, series, source) @@ -627,6 +647,7 @@ def parse_email(email, date_received): "ref": ref, "index": indextype, "series": series, + "version": version, "expiry": list(option_stack.keys()), "quote_source": source, } |
