aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_emails.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/parse_emails.py')
-rw-r--r--python/parse_emails.py35
1 files changed, 22 insertions, 13 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index 2e01c5b1..9d2a180e 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -144,8 +144,11 @@ def parse_ms_block(fh, indextype):
strike = strike.strip()
if indextype == "HY":
strike = strike.split()[0]
- pay_bid, pay_offer, pay_delta = payer.strip().split()
- rec_bid, rec_offer, rec_delta = receiver.strip().split()
+ try:
+ pay_bid, pay_offer, pay_delta = payer.strip().split()
+ rec_bid, rec_offer, rec_delta = receiver.strip().split()
+ except ValueError:
+ break
vals = [strike, rec_bid, rec_offer, rec_delta,
pay_bid, pay_offer, pay_delta]
@@ -161,7 +164,9 @@ def parse_ms_block(fh, indextype):
vol, vol_change, be = vol.split()
vals += [vol]
r.append(vals)
- return makedf(r, indextype, "MS")
+ else:
+ return makedf(r, indextype, "MS")
+ return None
def parse_nomura_block(fh, indextype):
@@ -260,8 +265,9 @@ def parse_ms(fh, indextype, *args):
return option_stack
-def parse_nomura(fh, indextype, *args):
+def parse_nom(fh, indextype, *args):
option_stack = {}
+
def aux(line, fh, indextype, option_stack):
expiry = line.split(" ")[0]
expiry = pd.to_datetime(expiry, format="%d-%b-%y")
@@ -281,7 +287,6 @@ def parse_nomura(fh, indextype, *args):
def parse_sg(fh, indextype, expiration_dates):
option_stack = {}
- fwd_index = []
for line in fh:
line = line.rstrip()
if line.startswith("Type"):
@@ -315,14 +320,15 @@ def parse_gs(fh, indextype, series, quotedate, ref):
subject_baml = re.compile(r"(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
subject_ms = re.compile(r"[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)")
-subject_nomura = re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)")
+subject_nom = re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)")
subject_gs = re.compile(r"GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)")
subject_sg = re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)")
+
def parse_email(email, date_received):
with open(email.path, "rt") as fh:
subject = next(fh)
- for source in ['BAML', 'MS', 'NOMURA', 'GS', 'SG']:
+ for source in ['BAML', 'MS', 'NOM', 'GS', 'SG']:
m = globals()['subject_'+source.lower()].match(subject)
if m:
if source == 'BAML':
@@ -406,7 +412,7 @@ def pickle_drop_date(date):
pickle.dump(newdict, fh)
-if __name__=="__main__":
+if __name__ == "__main__":
save_emails()
data_dir = os.path.join(os.getenv("DATA_DIR"), "swaptions")
emails = [f for f in os.scandir(data_dir) if f.is_file()]
@@ -419,7 +425,8 @@ if __name__=="__main__":
already_uploaded = {}
for f in emails:
date_received, msg_id = f.name.split("_")
- date_received = datetime.datetime.strptime(date_received, "%Y-%m-%d %H-%M-%S")
+ date_received = datetime.datetime.strptime(date_received,
+ "%Y-%m-%d %H-%M-%S")
if msg_id in already_uploaded:
continue
else:
@@ -428,10 +435,11 @@ if __name__=="__main__":
except RuntimeError as e:
logging.error(e)
else:
- if key[0] is None:
- logging.error("Something wrong with email: {}".format(f.name))
+ if key[0] is None or len(option_stack) == 0:
+ logging.error(f"Something wrong with email: f.name")
continue
- swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike'])
+ swaption_stack[key] = pd.concat(option_stack,
+ names=['expiry', 'strike'])
index_data = index_data.append(fwd_index)
already_uploaded[msg_id] = key[0]
if index_data.empty:
@@ -441,7 +449,8 @@ if __name__=="__main__":
index_data[col] = index_data[col].astype('float')
index_data['index'] = index_data['index'].astype('category')
- swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series'])
+ swaption_stack = pd.concat(swaption_stack,
+ names=['quotedate', 'index', 'series'])
swaption_stack = swaption_stack.reset_index()
swaption_stack = swaption_stack.drop_duplicates(['quotedate', 'index', 'series', 'expiry', 'strike'])
swaption_stack = swaption_stack.set_index(['quotedate', 'index', 'series', 'expiry'])