aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/parse_emails.py96
1 files changed, 70 insertions, 26 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index 5c5fb8bd..c31bf1f7 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -40,13 +40,13 @@ def parse_quotedate(fh, date_received):
for line in fh:
line = line.rstrip()
if line.startswith("At"):
- for p in ['%m/%d %H:%M:%S', '%b %d %Y %H:%M:%S']:
+ for p in ['%m/%d/%y %H:%M:%S', '%b %d %Y %H:%M:%S', '%m/%d %H:%M:%S']:
try:
quotedate = pd.to_datetime(line, format=p, exact=False)
except ValueError:
continue
else:
- if quotedate.year == 1900:
+ if quotedate.year == 1900: # p='%m/%d %H:%M:%S'
quotedate = quotedate.replace(year=date_received.year)
break
else:
@@ -132,6 +132,27 @@ def parse_ms_block(fh, indextype):
r.append(vals)
return makedf(r, indextype, "MS")
+def parse_nomura_block(fh, indextype):
+ next(fh) ## skip header
+ r = []
+ for line in fh:
+ line = line.rstrip()
+ if line == "":
+ break
+ strike, receiver, payer, vol, _ = line.split("|", 4)
+ strike = strike.strip()
+ pay, pay_delta = payer.strip().split()
+ rec, rec_delta = receiver.strip().split()
+ pay_bid, pay_offer = pay.split("/")
+ rec_bid, rec_offer = rec.split("/")
+ vol = vol.strip()
+ vals = [strike, rec_bid, rec_offer, rec_delta,
+ pay_bid, pay_offer, pay_delta, vol]
+ if indextype == "HY": # we don't have price vol
+ vals.append(None)
+ r.append(vals)
+ return makedf(r, indextype, "NOM")
+
def parse_ms(fh, indextype):
option_stack = {}
for line in fh:
@@ -142,33 +163,46 @@ def parse_ms(fh, indextype):
option_stack[expiry] = parse_ms_block(fh, indextype)
return option_stack
-subject_BAML = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
-subject_MS = re.compile("\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^\d]*([\d.]+)")
+def parse_nomura(fh, indextype):
+ option_stack = {}
+ for line in fh:
+ line = line.rstrip()
+ if "EXPIRY" in line:
+ expiry = line.split(" ")[0]
+ expiry = pd.to_datetime(expiry, format="%d-%b-%y")
+ option_stack[expiry] = parse_nomura_block(fh, indextype)
+ return option_stack
+
+subject_baml = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
+subject_ms = re.compile("[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)")
+subject_nomura = re.compile("(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)")
def parse_email(email):
with open(email.path, "rt") as fh:
date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000)
subject = next(fh)
- m = subject_BAML.match(subject)
- if m:
- indextype, series = m.groups()
- series = int(series)
- quotedate = parse_quotedate(fh, date_received)
- return (quotedate, indextype, series), parse_baml(fh, indextype, series, quotedate)
- m = subject_MS.match(subject)
- if m:
- indextype, series, ref = m.groups()
- series = int(series)
- ref = float(ref)
- quotedate = parse_quotedate(fh, date_received)
- option_stack = parse_ms(fh, indextype)
- fwd_index = pd.DataFrame({'quotedate': quotedate,
- 'ref': ref,
- 'index': indextype,
- 'series': series,
- 'expiry': list(option_stack.keys())})
- fwd_index.set_index('quotedate', inplace = True)
- return (quotedate, indextype, series), (option_stack, fwd_index)
+ for source in ['BAML', 'MS', 'NOMURA']:
+ m = globals()['subject_'+source.lower()].match(subject)
+ if m:
+ if source == 'BAML':
+ indextype, series = m.groups()
+ else:
+ indextype, series, ref = m.groups()
+ ref = float(ref)
+ series = int(series)
+ quotedate = parse_quotedate(fh, date_received)
+
+ if source == 'BAML':
+ return (quotedate, indextype, series), parse_baml(fh, indextype, series, quotedate)
+ else:
+ option_stack = globals()['parse_'+source.lower()](fh, indextype)
+ fwd_index = pd.DataFrame({'quotedate': quotedate,
+ 'ref': ref,
+ 'index': indextype,
+ 'series': series,
+ 'expiry': list(option_stack.keys())})
+ fwd_index.set_index('quotedate', inplace = True)
+ return (quotedate, indextype, series), (option_stack, fwd_index)
raise RuntimeError("can't parse subject line: {0} for email {1}".format(
subject, email.name))
@@ -180,7 +214,9 @@ def write_todb(swaption_stack, index_data):
psycopg2.extensions.register_adapter(float, nan_to_null)
meta = MetaData(bind=serenitasdb)
swaption_quotes = Table('swaption_quotes', meta, autoload=True)
- ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute()
+ for r in swaption_stack.to_dict(orient='records'):
+ serenitasdb.execute(swaption_quotes.insert(), r)
+ #ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute()
index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False)
def get_email_list(date):
@@ -197,6 +233,13 @@ def get_email_list(date):
df = df.reset_index().set_index('quotedate')
return df.loc[date,'index'].tolist()
+def pickle_drop_date(date):
+ with open(".pickle", "rb") as fh:
+ already_uploaded = pickle.load(fh)
+ newdict = {k: v for k, v in already_uploaded.items() if v.date() != date}
+ with open(".pickle", "wb") as fh:
+ pickle.dump(newdict, fh)
+
if __name__=="__main__":
update_emails()
data_dir = os.path.join(os.getenv("DATA_DIR"), "swaptions")
@@ -223,7 +266,8 @@ if __name__=="__main__":
if index_data.empty:
sys.exit()
for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']:
- index_data[col] = index_data[col].astype('float')
+ if col in index_data:
+ index_data[col] = index_data[col].astype('float')
index_data['index'] = index_data['index'].astype('category')
swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series'])