diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/parse_emails.py | 48 |
1 files changed, 33 insertions, 15 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index d93c17b6..2e01c5b1 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -12,10 +12,12 @@ import sys from quantlib.time.imm import next_date from quantlib.time.api import Date, pydate_from_qldate -logging.basicConfig(filename=os.path.join(os.getenv("LOG_DIR"), 'emails_parsing.log'), +logging.basicConfig(filename=os.path.join(os.getenv("LOG_DIR"), + 'emails_parsing.log'), level=logging.WARNING, format='%(asctime)s %(message)s') + def list_imm_dates(date): d = Date.from_datetime(date) r = [] @@ -24,6 +26,7 @@ def list_imm_dates(date): r.append(pydate_from_qldate(d)) return r + def makedf(r, indextype, quote_source): if indextype == 'IG': cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid', @@ -35,7 +38,7 @@ def makedf(r, indextype, quote_source): cols.append('gamma') if quote_source == "GS": cols.append("tail") - df = pd.DataFrame.from_records(r, columns = cols) + df = pd.DataFrame.from_records(r, columns=cols) for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma', 'tail']: if col in df: df[col] = df[col].str.strip("%").astype('float') / 100 @@ -52,6 +55,7 @@ def makedf(r, indextype, quote_source): df.set_index('strike', inplace=True) return df + def parse_quotedate(fh, date_received): for line in fh: line = line.rstrip() @@ -69,10 +73,11 @@ def parse_quotedate(fh, date_received): raise RuntimeError("can't parse date") return quotedate + def parse_refline(line): - regex = "Ref:(?P<ref>\S+)\s+(?:Fwd Px:(?P<fwdprice>\S+)\s+)?" \ - "Fwd(?: Spd)?:(?P<fwdspread>\S+)\s+Fwd Bpv:(?P<fwdbpv>\S+)" \ - "\s+Expiry:(?P<expiry>\S+)" + regex = r"Ref:(?P<ref>\S+)\s+(?:Fwd Px:(?P<fwdprice>\S+)\s+)?" \ + r"Fwd(?: Spd)?:(?P<fwdspread>\S+)\s+Fwd Bpv:(?P<fwdbpv>\S+)" \ + r"\s+Expiry:(?P<expiry>\S+)" m = re.match(regex, line) try: d = m.groupdict() @@ -81,6 +86,7 @@ def parse_refline(line): logging.error("something wrong with " + fh.name) return d + def parse_baml(fh, indextype, series, quotedate, *args): option_stack = {} fwd_index = [] @@ -107,6 +113,7 @@ def parse_baml(fh, indextype, series, quotedate, *args): else: raise RuntimeError("empty email: " + fh.name) + def parse_baml_block(fh, indextype): next(fh) ## skip header r = [] @@ -123,6 +130,7 @@ def parse_baml_block(fh, indextype): r.append(vals) return makedf(r, indextype, "BAML"), line + def parse_ms_block(fh, indextype): line = next(fh) ## skip header if line.strip() == "": ## empty block @@ -155,6 +163,7 @@ def parse_ms_block(fh, indextype): r.append(vals) return makedf(r, indextype, "MS") + def parse_nomura_block(fh, indextype): next(fh) ## skip header r = [] @@ -178,6 +187,7 @@ def parse_nomura_block(fh, indextype): return None, makedf(r, indextype, "NOM") return line, makedf(r, indextype, "NOM") + def parse_sg_block(fh, indextype, expiration_dates): r = [] for line in fh: @@ -206,6 +216,7 @@ def parse_sg_block(fh, indextype, expiration_dates): r.append(vals) return expiry, makedf(r, indextype, "SG") + def parse_gs_block(fh, indextype): next(fh) r = [] @@ -235,6 +246,7 @@ def parse_gs_block(fh, indextype): r.append(vals) return makedf(r, indextype, "GS") + def parse_ms(fh, indextype, *args): option_stack = {} for line in fh: @@ -247,6 +259,7 @@ def parse_ms(fh, indextype, *args): option_stack[expiry] = block return option_stack + def parse_nomura(fh, indextype, *args): option_stack = {} def aux(line, fh, indextype, option_stack): @@ -258,13 +271,14 @@ def parse_nomura(fh, indextype, *args): if "EXPIRY" in next_line: aux(next_line, fh, indextype, option_stack) else: - raise RuntimeError("Don't know what to do with {}:".format(line)) + raise RuntimeError(f"Don't know what to do with {line}.") for line in fh: line = line.rstrip() if "EXPIRY" in line: aux(line, fh, indextype, option_stack) return option_stack + def parse_sg(fh, indextype, expiration_dates): option_stack = {} fwd_index = [] @@ -275,6 +289,7 @@ def parse_sg(fh, indextype, expiration_dates): option_stack[expiry] = df return option_stack + def parse_gs(fh, indextype, series, quotedate, ref): option_stack = {} fwd_index = [] @@ -283,7 +298,7 @@ def parse_gs(fh, indextype, series, quotedate, ref): for line in fh: line = line.rstrip() if line.startswith("Expiry"): - m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) + m = re.match(r"Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) if m: expiry, fwdprice, fwdspread = m.groups() expiry = pd.to_datetime(expiry, format='%d%b%y') @@ -298,11 +313,11 @@ def parse_gs(fh, indextype, series, quotedate, ref): fwd_index['quote_source'] = 'GS' return option_stack, fwd_index -subject_baml = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s") -subject_ms = re.compile("[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)") -subject_nomura = re.compile("(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)") -subject_gs = re.compile("GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)") -subject_sg = re.compile("SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)") +subject_baml = re.compile(r"(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s") +subject_ms = re.compile(r"[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)") +subject_nomura = re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)") +subject_gs = re.compile(r"GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)") +subject_sg = re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)") def parse_email(email, date_received): with open(email.path, "rt") as fh: @@ -339,8 +354,8 @@ def parse_email(email, date_received): fwd_index.set_index('quotedate', inplace=True) return (quotedate, indextype, series), (option_stack, fwd_index) else: - raise RuntimeError("can't parse subject line: {0} for email {1}".format( - subject, email.name)) + raise RuntimeError("can't parse subject line: {0} for email {1}". + format(subject, email.name)) def write_todb(swaption_stack, index_data): def gen_sql_str(query, table_name, columns): @@ -367,6 +382,7 @@ def write_todb(swaption_stack, index_data): df.itertuples(index=False)) conn.commit() + def get_email_list(date): """returns a list of email file names for a given date @@ -379,7 +395,8 @@ def get_email_list(date): df = pd.DataFrame.from_dict(already_uploaded, orient='index') df.columns = ['quotedate'] df = df.reset_index().set_index('quotedate') - return df.loc[date,'index'].tolist() + return df.loc[date, 'index'].tolist() + def pickle_drop_date(date): with open(".pickle", "rb") as fh: @@ -388,6 +405,7 @@ def pickle_drop_date(date): with open(".pickle", "wb") as fh: pickle.dump(newdict, fh) + if __name__=="__main__": save_emails() data_dir = os.path.join(os.getenv("DATA_DIR"), "swaptions") |
