aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_emails.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/parse_emails.py')
-rw-r--r--python/parse_emails.py65
1 files changed, 58 insertions, 7 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index a931e1cc..f0ea1885 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -7,12 +7,21 @@ import datetime
import logging
import pickle
import sys
-
+from quantlib.time.imm import next_date
+from quantlib.time.api import Date, pydate_from_qldate
logging.basicConfig(filename=os.path.join(os.getenv("LOG_DIR"), 'emails_parsing.log'),
level=logging.WARNING,
format='%(asctime)s %(message)s')
+def list_imm_dates(date):
+ d = Date.from_datetime(date)
+ r = []
+ for i in range(10):
+ d = next_date(d, False)
+ r.append(pydate_from_qldate(d))
+ return r
+
def makedf(r, indextype, quote_source):
if indextype=='IG':
cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid',
@@ -45,7 +54,7 @@ def makedf(r, indextype, quote_source):
def parse_quotedate(fh, date_received):
for line in fh:
line = line.rstrip()
- if line.startswith("At"):
+ if "At:" in line:
for p in ['%m/%d/%y %H:%M:%S', '%b %d %Y %H:%M:%S', '%m/%d %H:%M:%S']:
try:
quotedate = pd.to_datetime(line, format=p, exact=False)
@@ -158,6 +167,34 @@ def parse_nomura_block(fh, indextype):
r.append(vals)
return makedf(r, indextype, "NOM")
+def parse_sg_block(fh, indextype, expiration_dates):
+ r = []
+ for line in fh:
+ line = line.rstrip()
+ if line == "":
+ break
+ if indextype == "IG":
+ option_type, strike, price, delta, vol, expiry = line.split()
+ else:
+ option_type, strike, strike_spread, price, delta, vol, expiry = line.split()
+
+ expiry_month = datetime.datetime.strptime(expiry, "%b-%y").month
+ expiry = next(pd.Timestamp(d) for d in expiration_dates if d.month == expiry_month)
+ if option_type == "Rec":
+ rec_bid, rec_offer = price.split("/")
+ pay_bid, pay_offer = None, None
+ rec_delta, pay_delta = delta, None
+ else:
+ pay_bid, pay_offer = price.split("/")
+ rec_bid, rec_offer = None, None
+ rec_delta, pay_delta = None, delta
+ vals = [strike, rec_bid, rec_offer, rec_delta, pay_bid,
+ pay_offer, pay_delta, vol]
+ if indextype == "HY":
+ vals.append(None)
+ r.append(vals)
+ return expiry, makedf(r, indextype, "SG")
+
def parse_gs_block(fh, indextype):
next(fh)
r = []
@@ -187,7 +224,7 @@ def parse_gs_block(fh, indextype):
r.append(vals)
return makedf(r, indextype, "GS")
-def parse_ms(fh, indextype):
+def parse_ms(fh, indextype, *args):
option_stack = {}
for line in fh:
line = line.rstrip()
@@ -197,7 +234,7 @@ def parse_ms(fh, indextype):
option_stack[expiry] = parse_ms_block(fh, indextype)
return option_stack
-def parse_nomura(fh, indextype):
+def parse_nomura(fh, indextype, *args):
option_stack = {}
for line in fh:
line = line.rstrip()
@@ -207,6 +244,16 @@ def parse_nomura(fh, indextype):
option_stack[expiry] = parse_nomura_block(fh, indextype)
return option_stack
+def parse_sg(fh, indextype, expiration_dates):
+ option_stack = {}
+ fwd_index = []
+ for line in fh:
+ line = line.rstrip()
+ if line.startswith("Type"):
+ expiry, df = parse_sg_block(fh, indextype, expiration_dates)
+ option_stack[expiry] = df
+ return option_stack
+
def parse_gs(fh, indextype, series, quotedate, ref):
option_stack = {}
fwd_index = []
@@ -231,12 +278,13 @@ subject_baml = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
subject_ms = re.compile("[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)")
subject_nomura = re.compile("(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)")
subject_gs = re.compile("GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)")
+subject_sg = re.compile("SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)")
def parse_email(email):
with open(email.path, "rt") as fh:
date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000)
subject = next(fh)
- for source in ['BAML', 'MS', 'NOMURA', 'GS']:
+ for source in ['BAML', 'MS', 'NOMURA', 'GS', 'SG']:
m = globals()['subject_'+source.lower()].match(subject)
if m:
if source == 'BAML':
@@ -244,9 +292,12 @@ def parse_email(email):
else:
indextype, series, ref = m.groups()
ref = float(ref)
-
series = int(series)
quotedate = parse_quotedate(fh, date_received)
+ if quotedate is None:
+ print(email.path)
+ continue
+ expiration_dates = list_imm_dates(quotedate)
parse_fun = globals()['parse_'+source.lower()]
if source == 'BAML':
return (quotedate, indextype, series), \
@@ -255,7 +306,7 @@ def parse_email(email):
return (quotedate, indextype, series), \
parse_fun(fh, indextype, series, quotedate, ref)
else:
- option_stack = parse_fun(fh, indextype)
+ option_stack = parse_fun(fh, indextype, expiration_dates)
fwd_index = pd.DataFrame({'quotedate': quotedate,
'ref': ref,
'index': indextype,