aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_emails.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/parse_emails.py')
-rw-r--r--python/parse_emails.py58
1 files changed, 42 insertions, 16 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index f9ad9d9c..79d54a81 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -4,22 +4,26 @@ from pathlib import Path
import pdb
from download_emails import update_emails
import datetime
+import sys
def makedf(r, indextype):
if indextype=='IG':
- cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid',
- 'PayOffer', 'DeltaPay', 'Vol', 'Gamma']
+ cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid',
+ 'pay_offer', 'delta_pay', 'vol', 'gamma']
else:
- cols = ['Strike', 'RecBid', 'RecOffer', 'DeltaRec', 'PayBid',
- 'PayOffer', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']
+ cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid',
+ 'pay_offer', 'delta_pay', 'vol', 'price_vol', 'gamma']
df = pd.DataFrame.from_records(r, columns = cols)
- for col in ['DeltaRec', 'DeltaPay', 'Vol', 'PxVol', 'Gamma']:
+ for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma']:
if col in df:
df[col] = df[col].str.strip("%").astype('float')/100
for k in df:
- if df.dtypes[k]=='object':
- df[k] = pd.to_numeric(df[k])
- df.set_index('Strike', inplace=True)
+ if df.dtypes[k] == 'object':
+ try:
+ df[k] = pd.to_numeric(df[k])
+ except ValueError:
+ pdb.set_trace()
+ df.set_index('strike', inplace=True)
return df
def parse_email(email_path):
@@ -59,7 +63,7 @@ def parse_email(email_path):
try:
d = m.groupdict()
d['quotedate'] = quotedate
- d['indextype'] = indextype
+ d['index'] = indextype
d['series'] = series
d['expiry'] = pd.to_datetime(d['expiry'], format='%d-%b-%y')
except AttributeError:
@@ -92,23 +96,45 @@ def parse_email(email_path):
raise RuntimeError("empty email: {0}".format(email_path.name))
if __name__=="__main__":
+ import pickle
update_emails()
emails = [f for f in Path("../../data/swaptions").iterdir() if f.is_file()]
swaption_stack = {}
index_data = pd.DataFrame()
+ with open(".pickle", "rb") as fh:
+ already_uploaded = pickle.load(fh)
for f in emails:
+ if f.name in already_uploaded:
+ continue
+ else:
+ already_uploaded.add(f.name)
try:
key, option_stack, fwd_index = parse_email(f)
except RuntimeError as e:
print(e)
else:
- swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'Strike'])
+ swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike'])
index_data = index_data.append(fwd_index)
+ if index_data.empty:
+ sys.exit()
for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']:
index_data[col] = index_data[col].astype('float')
- index_data['indextype'] = index_data['indextype'].astype('category')
- swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'indextype', 'series'])
- with pd.HDFStore('../../data/swaptions.hdf', mode = 'w', complevel=4,
- complib='blosc', fletcher32=True) as swaptions:
- swaptions.append('swaptions', swaption_stack)
- swaptions.append('index_data', index_data)
+ index_data['index'] = index_data['index'].astype('category')
+
+ swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series'])
+ import feather
+ feather.write_dataframe(swaption_stack, '../../data/swaptions.fth')
+ feather.write_dataframe(index_data, '../../data/index_data.fth')
+
+ swaption_stack = swaption_stack.drop_duplicates()
+ swaption_stack = swaption_stack.reset_index()
+ index_data = index_data.drop_duplicates()
+ from db import dbengine
+ serenitasdb = dbengine('serenitasdb')
+ from sqlalchemy import MetaData, Table
+ meta = MetaData(bind=serenitasdb)
+ swaption_quotes = Table('swaption_quotes', meta, autoload=True)
+ ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute()
+ index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append')
+ with open(".pickle", "wb") as fh:
+ pickle.dump(already_uploaded, fh)