aboutsummaryrefslogtreecommitdiffstats
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/parse_emails.py75
1 files changed, 75 insertions, 0 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
new file mode 100644
index 00000000..93383fe7
--- /dev/null
+++ b/python/parse_emails.py
@@ -0,0 +1,75 @@
+import pandas as pd
+import re
+from pathlib import Path
+
+emails = [f for f in Path("quotes").iterdir() if f.is_file()]
+
+def makedf(r, indextype, ref):
+ if indextype=='IG':
+ cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid',
+ 'PayOffer', 'DeltaPay', 'Vol', 'Gamma']
+ else:
+ cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid',
+ 'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']
+ df = pd.DataFrame.from_records(r, columns = cols)
+ df['ref'] = ref
+ for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']:
+ if col in df:
+ df[col] = df[col].str.strip("%").astype('float')/100
+ df = df.convert_objects(convert_numeric=True)
+ df.set_index('Strike', inplace=True)
+ return df
+
+masterdf = {}
+for f in emails:
+ with f.open("rb") as fh:
+ subject = next(fh)
+ m = re.match("(?:Fwd:)?(\w{2})(\S+)\s", subject.decode('utf-8'))
+ if m:
+ indextype, series = m.groups()
+ series = int(series)
+ if indextype=='HY' and series==24:
+ print('{0}'.format(f))
+ else:
+ print("can't parse subject line for {0}".format(f))
+ flag = False
+ allexpiriesdf = {}
+ for line in fh:
+ line = line.decode('utf-8', 'ignore')
+ line = line.rstrip()
+ if line.startswith("At"):
+ quotedate = pd.to_datetime(line[4:])
+ if quotedate >= pd.to_datetime('2015-04-01'):
+ continue
+ if line.startswith("Ref"):
+ m = re.match("Ref:(\S+)\s+Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", line)
+ if m:
+ ref, fwspread, fwbpv, expiry = m.groups()
+ expiry = pd.datetime.strptime(expiry, '%d-%b-%y')
+ else:
+ print("something wrong with {0}".format(f))
+ continue
+ if line.startswith("Strike"):
+ if "Px Vol" in line:
+ indextype='HY'
+ else:
+ indextype='IG'
+ flag = True
+ r = []
+ continue
+ if flag:
+ if line:
+ line = re.sub("[/|]", " ", line)
+ vals = re.sub(" +", " ", line).rstrip().split(" ")
+ r.append(vals)
+ continue
+ else:
+ allexpiriesdf[expiry] = makedf(r, indextype, ref)
+ flag = False
+ r = []
+ continue
+ if flag:
+ allexpiriesdf[expiry] = makedf(r, indextype, ref)
+ masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike'])
+masterdf = pd.concat(masterdf, names=['indextype', 'series', 'quotedate'])
+masterdf.to_hdf('swaptions.hdf', key='swaptions')