1 files changed, 75 insertions, 0 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
new file mode 100644
index 00000000..93383fe7
--- /dev/null
+++ b/python/parse_emails.py
@@ -0,0 +1,75 @@
+import pandas as pd
+import re
+from pathlib import Path
+
+emails = [f for f in Path("quotes").iterdir() if f.is_file()]
+
+def makedf(r, indextype, ref):
+    if indextype=='IG':
+        cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid',
+                'PayOffer', 'DeltaPay', 'Vol', 'Gamma']
+    else:
+        cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid',
+                'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']
+    df = pd.DataFrame.from_records(r, columns = cols)
+    df['ref'] = ref
+    for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']:
+        if col in df:
+            df[col] = df[col].str.strip("%").astype('float')/100
+    df = df.convert_objects(convert_numeric=True)
+    df.set_index('Strike', inplace=True)
+    return df
+
+masterdf = {}
+for f in emails:
+    with f.open("rb") as fh:
+        subject = next(fh)
+        m = re.match("(?:Fwd:)?(\w{2})(\S+)\s", subject.decode('utf-8'))
+        if m:
+            indextype, series = m.groups()
+            series = int(series)
+            if indextype=='HY' and series==24:
+                print('{0}'.format(f))
+        else:
+            print("can't parse subject line for {0}".format(f))
+        flag = False
+        allexpiriesdf = {}
+        for line in fh:
+            line = line.decode('utf-8', 'ignore')
+            line = line.rstrip()
+            if line.startswith("At"):
+                quotedate = pd.to_datetime(line[4:])
+                if quotedate >= pd.to_datetime('2015-04-01'):
+                    continue
+            if line.startswith("Ref"):
+                m = re.match("Ref:(\S+)\s+Fwd(?: Spd)?:(\S+)\s+Fwd Bpv:(\S+)\s+Expiry:(\S+)", line)
+                if m:
+                    ref, fwspread, fwbpv, expiry = m.groups()
+                    expiry = pd.datetime.strptime(expiry, '%d-%b-%y')
+                else:
+                    print("something wrong with {0}".format(f))
+                continue
+            if line.startswith("Strike"):
+                if "Px Vol" in line:
+                    indextype='HY'
+                else:
+                    indextype='IG'
+                flag = True
+                r = []
+                continue
+            if flag:
+                if line:
+                    line = re.sub("[/|]", " ", line)
+                    vals = re.sub(" +", " ", line).rstrip().split(" ")
+                    r.append(vals)
+                    continue
+                else:
+                    allexpiriesdf[expiry] = makedf(r, indextype, ref)
+                    flag = False
+                    r = []
+                    continue
+    if flag:
+        allexpiriesdf[expiry] = makedf(r, indextype, ref)
+    masterdf[(quotedate, indextype, series)] = pd.concat(allexpiriesdf, names=['expiry', 'Strike'])
+masterdf = pd.concat(masterdf, names=['indextype', 'series', 'quotedate'])
+masterdf.to_hdf('swaptions.hdf', key='swaptions')