diff options
Diffstat (limited to 'python/parse_emails.py')
| -rw-r--r-- | python/parse_emails.py | 35 |
1 files changed, 22 insertions, 13 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index 2e01c5b1..9d2a180e 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -144,8 +144,11 @@ def parse_ms_block(fh, indextype): strike = strike.strip() if indextype == "HY": strike = strike.split()[0] - pay_bid, pay_offer, pay_delta = payer.strip().split() - rec_bid, rec_offer, rec_delta = receiver.strip().split() + try: + pay_bid, pay_offer, pay_delta = payer.strip().split() + rec_bid, rec_offer, rec_delta = receiver.strip().split() + except ValueError: + break vals = [strike, rec_bid, rec_offer, rec_delta, pay_bid, pay_offer, pay_delta] @@ -161,7 +164,9 @@ def parse_ms_block(fh, indextype): vol, vol_change, be = vol.split() vals += [vol] r.append(vals) - return makedf(r, indextype, "MS") + else: + return makedf(r, indextype, "MS") + return None def parse_nomura_block(fh, indextype): @@ -260,8 +265,9 @@ def parse_ms(fh, indextype, *args): return option_stack -def parse_nomura(fh, indextype, *args): +def parse_nom(fh, indextype, *args): option_stack = {} + def aux(line, fh, indextype, option_stack): expiry = line.split(" ")[0] expiry = pd.to_datetime(expiry, format="%d-%b-%y") @@ -281,7 +287,6 @@ def parse_nomura(fh, indextype, *args): def parse_sg(fh, indextype, expiration_dates): option_stack = {} - fwd_index = [] for line in fh: line = line.rstrip() if line.startswith("Type"): @@ -315,14 +320,15 @@ def parse_gs(fh, indextype, series, quotedate, ref): subject_baml = re.compile(r"(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s") subject_ms = re.compile(r"[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)") -subject_nomura = re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)") +subject_nom = re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)") subject_gs = re.compile(r"GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)") subject_sg = re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)") + def parse_email(email, date_received): with open(email.path, "rt") as fh: subject = next(fh) - for source in ['BAML', 'MS', 'NOMURA', 'GS', 'SG']: + for source in ['BAML', 'MS', 'NOM', 'GS', 'SG']: m = globals()['subject_'+source.lower()].match(subject) if m: if source == 'BAML': @@ -406,7 +412,7 @@ def pickle_drop_date(date): pickle.dump(newdict, fh) -if __name__=="__main__": +if __name__ == "__main__": save_emails() data_dir = os.path.join(os.getenv("DATA_DIR"), "swaptions") emails = [f for f in os.scandir(data_dir) if f.is_file()] @@ -419,7 +425,8 @@ if __name__=="__main__": already_uploaded = {} for f in emails: date_received, msg_id = f.name.split("_") - date_received = datetime.datetime.strptime(date_received, "%Y-%m-%d %H-%M-%S") + date_received = datetime.datetime.strptime(date_received, + "%Y-%m-%d %H-%M-%S") if msg_id in already_uploaded: continue else: @@ -428,10 +435,11 @@ if __name__=="__main__": except RuntimeError as e: logging.error(e) else: - if key[0] is None: - logging.error("Something wrong with email: {}".format(f.name)) + if key[0] is None or len(option_stack) == 0: + logging.error(f"Something wrong with email: f.name") continue - swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike']) + swaption_stack[key] = pd.concat(option_stack, + names=['expiry', 'strike']) index_data = index_data.append(fwd_index) already_uploaded[msg_id] = key[0] if index_data.empty: @@ -441,7 +449,8 @@ if __name__=="__main__": index_data[col] = index_data[col].astype('float') index_data['index'] = index_data['index'].astype('category') - swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series']) + swaption_stack = pd.concat(swaption_stack, + names=['quotedate', 'index', 'series']) swaption_stack = swaption_stack.reset_index() swaption_stack = swaption_stack.drop_duplicates(['quotedate', 'index', 'series', 'expiry', 'strike']) swaption_stack = swaption_stack.set_index(['quotedate', 'index', 'series', 'expiry']) |
