aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/quote_parsing/__main__.py19
-rw-r--r--python/quote_parsing/parse_emails.py21
2 files changed, 26 insertions, 14 deletions
diff --git a/python/quote_parsing/__main__.py b/python/quote_parsing/__main__.py
index c97b21ef..95657dea 100644
--- a/python/quote_parsing/__main__.py
+++ b/python/quote_parsing/__main__.py
@@ -47,6 +47,7 @@ for f in emails:
logger.error(f"Something wrong with email: {f.name}")
continue
swaption_stack[key] = pd.concat(option_stack, names=["expiry", "strike"])
+ fwd_index["msg_id"] = int(msg_id, 16)
index_data = index_data.append(fwd_index)
already_uploaded[msg_id] = key[0]
if index_data.empty:
@@ -56,14 +57,20 @@ for col in ["fwdbpv", "fwdprice", "fwdspread", "ref"]:
index_data[col] = index_data[col].astype("float")
index_data["index"] = index_data["index"].astype("category")
-swaption_stack = pd.concat(swaption_stack, names=["quotedate", "index", "series"])
-swaption_stack = swaption_stack.reset_index()
-swaption_stack = swaption_stack.drop_duplicates(
- ["quotedate", "index", "series", "expiry", "strike"]
+index_names = ["quotedate", "index", "series", "quote_source"]
+swaption_stack = pd.concat(swaption_stack, names=index_names)
+dup = swaption_stack.index.duplicated()
+if dup.any():
+ logger.warning("duplicated data")
+ swaption_stack = swaption_stack[~dup]
+swaption_stack = swaption_stack.reset_index().set_index(
+ ["quotedate", "index", "series", "expiry", "quote_source"]
)
-swaption_stack = swaption_stack.set_index(["quotedate", "index", "series", "expiry"])
+swaption_stack = swaption_stack.sort_index()
index_data = index_data.reset_index()
-index_data = index_data.drop_duplicates(["quotedate", "index", "series", "expiry"])
+index_data = index_data.drop_duplicates(
+ ["quotedate", "index", "series", "expiry", "quote_source"]
+)
from utils.db import serenitas_pool
conn = serenitas_pool.getconn()
diff --git a/python/quote_parsing/parse_emails.py b/python/quote_parsing/parse_emails.py
index 961e47b5..47a17e9e 100644
--- a/python/quote_parsing/parse_emails.py
+++ b/python/quote_parsing/parse_emails.py
@@ -408,7 +408,7 @@ subject_sg = re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)"
subject_citi = re.compile(r"(?:Fwd:)?Citi Options: (IG|HY)(\d{2}) 5Y")
def parse_email(email, date_received):
- with open(email.path, "rt") as fh:
+ with email.open("rt") as fh:
subject = fh.readline().lstrip()
for source in ['BAML', 'GS', 'MS', 'NOM', 'SG', 'CITI']:
@@ -431,11 +431,12 @@ def parse_email(email, date_received):
expiration_dates = list_imm_dates(quotedate)
parse_fun = globals()[f'parse_{source.lower()}']
+ key = (quotedate, indextype, series, source)
if source in ['BAML', 'CITI']:
- return (quotedate, indextype, series), \
+ return (key, parse_fun(fh, indextype, series, quotedate))
parse_fun(fh, indextype, series, quotedate)
elif source == "GS":
- return (quotedate, indextype, series), \
+ return (key, parse_fun(fh, indextype, series, quotedate, ref))
parse_fun(fh, indextype, series, quotedate, ref)
else:
option_stack = parse_fun(fh, indextype, expiration_dates)
@@ -446,7 +447,7 @@ def parse_email(email, date_received):
'expiry': list(option_stack.keys()),
'quote_source': source})
fwd_index.set_index('quotedate', inplace=True)
- return (quotedate, indextype, series), (option_stack, fwd_index)
+ return (key, (option_stack, fwd_index))
else:
raise RuntimeError(f"can't parse subject line: {subject} for email {email.name}")
@@ -469,12 +470,16 @@ def write_todb(swaption_stack, index_data, conn):
continue
else:
try:
- df = swaption_stack.loc[(t.quotedate, t.index, t.series, t.expiry),]
+ df = swaption_stack.loc[
+ (t.quotedate, t.index, t.series, t.expiry, t.quote_source),
+ ]
except KeyError as e:
- logger.warning("missing key in swaption_stack: "
- f"{t.quotedate}, {t.index}, {t.series}, {t.expiry}")
+ logger.warning(
+ "missing key in swaption_stack: "
+ f"{t.quotedate}, {t.index}, {t.series}, {t.expiry}, {t.quote_source}"
+ )
continue
- except IndexingError:
+ except IndexError:
breakpoint()
df['ref_id'] = ref_id
c.executemany(gen_sql_str(query, "swaption_quotes", df.columns),