diff options
| -rw-r--r-- | python/parse_emails.py | 73 |
1 files changed, 67 insertions, 6 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index 716d4949..a931e1cc 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -22,10 +22,16 @@ def makedf(r, indextype, quote_source): 'pay_offer', 'delta_pay', 'vol', 'price_vol'] if quote_source == "BAML": cols.append('gamma') + if quote_source == "GS": + cols.append("tail") df = pd.DataFrame.from_records(r, columns = cols) - for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma']: + for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma', 'tail']: if col in df: df[col] = df[col].str.strip("%").astype('float')/100 + if quote_source == "GS": + for col in ["pay_bid", "pay_offer", "rec_bid", "rec_offer"]: + df[col] = df[col].str.strip('-') + df['delta_pay'] *= -1 for k in df: if df.dtypes[k] == 'object': try: @@ -65,7 +71,7 @@ def parse_refline(line): logging.error("something wrong with " + fh.name) return d -def parse_baml(fh, indextype, series, quotedate): +def parse_baml(fh, indextype, series, quotedate, *args): option_stack = {} fwd_index = [] line = "" @@ -90,7 +96,6 @@ def parse_baml(fh, indextype, series, quotedate): else: raise RuntimeError("empty email: " + fh.name) - def parse_baml_block(fh, indextype): next(fh) ## skip header r = [] @@ -153,6 +158,35 @@ def parse_nomura_block(fh, indextype): r.append(vals) return makedf(r, indextype, "NOM") +def parse_gs_block(fh, indextype): + next(fh) + r = [] + for line in fh: + line = line.rstrip() + if line == "": + break + vals = line.split() + if indextype=='HY': + vals.pop(2) + vals.pop(9) + else: + vals.pop(1) + vals.pop(8) + strike = vals.pop(0) + if indextype == "HY": + vals.pop(0) #pop the spread + pay, pay_delta = vals[:2] + pay_bid, pay_offer = pay.split("/") + rec_bid, rec_offer = vals[2].split("/") + vol = vals[3] + tail = vals[6] + vals = [strike, rec_bid, rec_offer, None, pay_bid, pay_offer, pay_delta, vol] + if indextype == "HY": + vals.append(None) + vals.append(tail) + r.append(vals) + return makedf(r, indextype, "GS") + def parse_ms(fh, indextype): option_stack = {} for line in fh: @@ -173,15 +207,36 @@ def parse_nomura(fh, indextype): option_stack[expiry] = parse_nomura_block(fh, indextype) return option_stack +def parse_gs(fh, indextype, series, quotedate, ref): + option_stack = {} + fwd_index = [] + d = {'quotedate': quotedate, 'index': indextype, + 'series': series, 'ref': ref} + for line in fh: + line = line.rstrip() + if line.startswith("Expiry"): + m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line) + if m: + expiry, fwdprice, fwdspread = m.groups() + expiry = pd.to_datetime(expiry, format='%d%b%y') + d.update({'fwdspread': fwdspread, 'fwdprice': fwdprice, + 'expiry': expiry}) + fwd_index.append(d) + option_stack[expiry] = parse_gs_block(fh, indextype) + fwd_index = pd.DataFrame.from_records(fwd_index, + index='quotedate') + return option_stack, fwd_index + subject_baml = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s") subject_ms = re.compile("[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)") subject_nomura = re.compile("(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)") +subject_gs = re.compile("GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)") def parse_email(email): with open(email.path, "rt") as fh: date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000) subject = next(fh) - for source in ['BAML', 'MS', 'NOMURA']: + for source in ['BAML', 'MS', 'NOMURA', 'GS']: m = globals()['subject_'+source.lower()].match(subject) if m: if source == 'BAML': @@ -189,12 +244,18 @@ def parse_email(email): else: indextype, series, ref = m.groups() ref = float(ref) + series = int(series) quotedate = parse_quotedate(fh, date_received) + parse_fun = globals()['parse_'+source.lower()] if source == 'BAML': - return (quotedate, indextype, series), parse_baml(fh, indextype, series, quotedate) + return (quotedate, indextype, series), \ + parse_fun(fh, indextype, series, quotedate) + elif source == "GS": + return (quotedate, indextype, series), \ + parse_fun(fh, indextype, series, quotedate, ref) else: - option_stack = globals()['parse_'+source.lower()](fh, indextype) + option_stack = parse_fun(fh, indextype) fwd_index = pd.DataFrame({'quotedate': quotedate, 'ref': ref, 'index': indextype, |
