aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/parse_emails.py73
1 files changed, 67 insertions, 6 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index 716d4949..a931e1cc 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -22,10 +22,16 @@ def makedf(r, indextype, quote_source):
'pay_offer', 'delta_pay', 'vol', 'price_vol']
if quote_source == "BAML":
cols.append('gamma')
+ if quote_source == "GS":
+ cols.append("tail")
df = pd.DataFrame.from_records(r, columns = cols)
- for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma']:
+ for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma', 'tail']:
if col in df:
df[col] = df[col].str.strip("%").astype('float')/100
+ if quote_source == "GS":
+ for col in ["pay_bid", "pay_offer", "rec_bid", "rec_offer"]:
+ df[col] = df[col].str.strip('-')
+ df['delta_pay'] *= -1
for k in df:
if df.dtypes[k] == 'object':
try:
@@ -65,7 +71,7 @@ def parse_refline(line):
logging.error("something wrong with " + fh.name)
return d
-def parse_baml(fh, indextype, series, quotedate):
+def parse_baml(fh, indextype, series, quotedate, *args):
option_stack = {}
fwd_index = []
line = ""
@@ -90,7 +96,6 @@ def parse_baml(fh, indextype, series, quotedate):
else:
raise RuntimeError("empty email: " + fh.name)
-
def parse_baml_block(fh, indextype):
next(fh) ## skip header
r = []
@@ -153,6 +158,35 @@ def parse_nomura_block(fh, indextype):
r.append(vals)
return makedf(r, indextype, "NOM")
+def parse_gs_block(fh, indextype):
+ next(fh)
+ r = []
+ for line in fh:
+ line = line.rstrip()
+ if line == "":
+ break
+ vals = line.split()
+ if indextype=='HY':
+ vals.pop(2)
+ vals.pop(9)
+ else:
+ vals.pop(1)
+ vals.pop(8)
+ strike = vals.pop(0)
+ if indextype == "HY":
+ vals.pop(0) #pop the spread
+ pay, pay_delta = vals[:2]
+ pay_bid, pay_offer = pay.split("/")
+ rec_bid, rec_offer = vals[2].split("/")
+ vol = vals[3]
+ tail = vals[6]
+ vals = [strike, rec_bid, rec_offer, None, pay_bid, pay_offer, pay_delta, vol]
+ if indextype == "HY":
+ vals.append(None)
+ vals.append(tail)
+ r.append(vals)
+ return makedf(r, indextype, "GS")
+
def parse_ms(fh, indextype):
option_stack = {}
for line in fh:
@@ -173,15 +207,36 @@ def parse_nomura(fh, indextype):
option_stack[expiry] = parse_nomura_block(fh, indextype)
return option_stack
+def parse_gs(fh, indextype, series, quotedate, ref):
+ option_stack = {}
+ fwd_index = []
+ d = {'quotedate': quotedate, 'index': indextype,
+ 'series': series, 'ref': ref}
+ for line in fh:
+ line = line.rstrip()
+ if line.startswith("Expiry"):
+ m = re.match("Expiry (\d{2}\w{3}\d{2}) \((?:([\S]+) )?([\S]+)\)", line)
+ if m:
+ expiry, fwdprice, fwdspread = m.groups()
+ expiry = pd.to_datetime(expiry, format='%d%b%y')
+ d.update({'fwdspread': fwdspread, 'fwdprice': fwdprice,
+ 'expiry': expiry})
+ fwd_index.append(d)
+ option_stack[expiry] = parse_gs_block(fh, indextype)
+ fwd_index = pd.DataFrame.from_records(fwd_index,
+ index='quotedate')
+ return option_stack, fwd_index
+
subject_baml = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
subject_ms = re.compile("[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)")
subject_nomura = re.compile("(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)")
+subject_gs = re.compile("GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)")
def parse_email(email):
with open(email.path, "rt") as fh:
date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000)
subject = next(fh)
- for source in ['BAML', 'MS', 'NOMURA']:
+ for source in ['BAML', 'MS', 'NOMURA', 'GS']:
m = globals()['subject_'+source.lower()].match(subject)
if m:
if source == 'BAML':
@@ -189,12 +244,18 @@ def parse_email(email):
else:
indextype, series, ref = m.groups()
ref = float(ref)
+
series = int(series)
quotedate = parse_quotedate(fh, date_received)
+ parse_fun = globals()['parse_'+source.lower()]
if source == 'BAML':
- return (quotedate, indextype, series), parse_baml(fh, indextype, series, quotedate)
+ return (quotedate, indextype, series), \
+ parse_fun(fh, indextype, series, quotedate)
+ elif source == "GS":
+ return (quotedate, indextype, series), \
+ parse_fun(fh, indextype, series, quotedate, ref)
else:
- option_stack = globals()['parse_'+source.lower()](fh, indextype)
+ option_stack = parse_fun(fh, indextype)
fwd_index = pd.DataFrame({'quotedate': quotedate,
'ref': ref,
'index': indextype,