diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/parse_emails.py | 65 |
1 files changed, 58 insertions, 7 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index a931e1cc..f0ea1885 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -7,12 +7,21 @@ import datetime import logging import pickle import sys - +from quantlib.time.imm import next_date +from quantlib.time.api import Date, pydate_from_qldate logging.basicConfig(filename=os.path.join(os.getenv("LOG_DIR"), 'emails_parsing.log'), level=logging.WARNING, format='%(asctime)s %(message)s') +def list_imm_dates(date): + d = Date.from_datetime(date) + r = [] + for i in range(10): + d = next_date(d, False) + r.append(pydate_from_qldate(d)) + return r + def makedf(r, indextype, quote_source): if indextype=='IG': cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid', @@ -45,7 +54,7 @@ def makedf(r, indextype, quote_source): def parse_quotedate(fh, date_received): for line in fh: line = line.rstrip() - if line.startswith("At"): + if "At:" in line: for p in ['%m/%d/%y %H:%M:%S', '%b %d %Y %H:%M:%S', '%m/%d %H:%M:%S']: try: quotedate = pd.to_datetime(line, format=p, exact=False) @@ -158,6 +167,34 @@ def parse_nomura_block(fh, indextype): r.append(vals) return makedf(r, indextype, "NOM") +def parse_sg_block(fh, indextype, expiration_dates): + r = [] + for line in fh: + line = line.rstrip() + if line == "": + break + if indextype == "IG": + option_type, strike, price, delta, vol, expiry = line.split() + else: + option_type, strike, strike_spread, price, delta, vol, expiry = line.split() + + expiry_month = datetime.datetime.strptime(expiry, "%b-%y").month + expiry = next(pd.Timestamp(d) for d in expiration_dates if d.month == expiry_month) + if option_type == "Rec": + rec_bid, rec_offer = price.split("/") + pay_bid, pay_offer = None, None + rec_delta, pay_delta = delta, None + else: + pay_bid, pay_offer = price.split("/") + rec_bid, rec_offer = None, None + rec_delta, pay_delta = None, delta + vals = [strike, rec_bid, rec_offer, rec_delta, pay_bid, + pay_offer, pay_delta, vol] + if indextype == "HY": + vals.append(None) + r.append(vals) + return expiry, makedf(r, indextype, "SG") + def parse_gs_block(fh, indextype): next(fh) r = [] @@ -187,7 +224,7 @@ def parse_gs_block(fh, indextype): r.append(vals) return makedf(r, indextype, "GS") -def parse_ms(fh, indextype): +def parse_ms(fh, indextype, *args): option_stack = {} for line in fh: line = line.rstrip() @@ -197,7 +234,7 @@ def parse_ms(fh, indextype): option_stack[expiry] = parse_ms_block(fh, indextype) return option_stack -def parse_nomura(fh, indextype): +def parse_nomura(fh, indextype, *args): option_stack = {} for line in fh: line = line.rstrip() @@ -207,6 +244,16 @@ def parse_nomura(fh, indextype): option_stack[expiry] = parse_nomura_block(fh, indextype) return option_stack +def parse_sg(fh, indextype, expiration_dates): + option_stack = {} + fwd_index = [] + for line in fh: + line = line.rstrip() + if line.startswith("Type"): + expiry, df = parse_sg_block(fh, indextype, expiration_dates) + option_stack[expiry] = df + return option_stack + def parse_gs(fh, indextype, series, quotedate, ref): option_stack = {} fwd_index = [] @@ -231,12 +278,13 @@ subject_baml = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s") subject_ms = re.compile("[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)") subject_nomura = re.compile("(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)") subject_gs = re.compile("GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)") +subject_sg = re.compile("SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)") def parse_email(email): with open(email.path, "rt") as fh: date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000) subject = next(fh) - for source in ['BAML', 'MS', 'NOMURA', 'GS']: + for source in ['BAML', 'MS', 'NOMURA', 'GS', 'SG']: m = globals()['subject_'+source.lower()].match(subject) if m: if source == 'BAML': @@ -244,9 +292,12 @@ def parse_email(email): else: indextype, series, ref = m.groups() ref = float(ref) - series = int(series) quotedate = parse_quotedate(fh, date_received) + if quotedate is None: + print(email.path) + continue + expiration_dates = list_imm_dates(quotedate) parse_fun = globals()['parse_'+source.lower()] if source == 'BAML': return (quotedate, indextype, series), \ @@ -255,7 +306,7 @@ def parse_email(email): return (quotedate, indextype, series), \ parse_fun(fh, indextype, series, quotedate, ref) else: - option_stack = parse_fun(fh, indextype) + option_stack = parse_fun(fh, indextype, expiration_dates) fwd_index = pd.DataFrame({'quotedate': quotedate, 'ref': ref, 'index': indextype, |
