diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/parse_emails.py | 31 | ||||
| -rw-r--r-- | python/parse_gs.py | 8 |
2 files changed, 24 insertions, 15 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index 9bf84fc5..7b58a856 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -99,7 +99,7 @@ def parse_baml_block(fh, indextype): break line = re.sub("[/|]", " ", line) vals = re.sub(" +", " ", line).rstrip().split(" ") - if len(vals) < 10: + if len(vals) < 3: ## something went wrong line = "" break r.append(vals) @@ -169,6 +169,17 @@ def parse_email(email_path): raise RuntimeError("can't parse subject line: {0} for email {1}".format( subject, email_path.name)) +def write_todb(swaption_stack, index_data): + from sqlalchemy import MetaData, Table + from db import dbengine, nan_to_null + import psycopg2 + serenitasdb = dbengine('serenitasdb') + psycopg2.extensions.register_adapter(float, nan_to_null) + meta = MetaData(bind=serenitasdb) + swaption_quotes = Table('swaption_quotes', meta, autoload=True) + ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute() + index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False) + if __name__=="__main__": import pickle update_emails() @@ -177,6 +188,7 @@ if __name__=="__main__": index_data = pd.DataFrame() with open(".pickle", "rb") as fh: already_uploaded = pickle.load(fh) + #already_uploaded = set() for f in emails: if f.name in already_uploaded: continue @@ -200,17 +212,10 @@ if __name__=="__main__": # feather.write_dataframe(swaption_stack, '../../data/swaptions.fth') # feather.write_dataframe(index_data, '../../data/index_data.fth') - swaption_stack = swaption_stack.drop_duplicates() swaption_stack = swaption_stack.reset_index() - index_data = index_data.drop_duplicates() - from db import dbengine, nan_to_null - import psycopg2 - serenitasdb = dbengine('serenitasdb') - psycopg2.extensions.register_adapter(float, nan_to_null) - from sqlalchemy import MetaData, Table - meta = MetaData(bind=serenitasdb) - swaption_quotes = Table('swaption_quotes', meta, autoload=True) - ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute() - index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append') + swaption_stack = swaption_stack.drop_duplicates(['quotedate', 'index', 'series', 'expiry', 'strike']) + index_data = index_data.reset_index() + index_data = index_data.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) + write_todb(swaption_stack, index_data) with open(".pickle", "wb") as fh: - pickle.dump(already_uploaded, fh) + pickle.dump(already_uploaded, fh) diff --git a/python/parse_gs.py b/python/parse_gs.py index 3e25c6cb..b58065ee 100644 --- a/python/parse_gs.py +++ b/python/parse_gs.py @@ -3,7 +3,7 @@ import pdb import re import os -data_dir = "/home/share/guillaume/swaptions" +data_dir = "/home/share/guillaume/IG swaptions" all_df = {} fwd_index = [] for f in os.listdir(data_dir): @@ -80,6 +80,7 @@ for f in os.listdir(data_dir): all_df[(quotedate, indextype, series)] = pd.concat(masterdf, names=['expiry']) all_df = pd.concat(all_df, names = ['quotedate', 'index', 'series']) all_df['DeltaPay'] = - all_df['DeltaPay']/100 +all_df['Vol'] /= 100 index_df = pd.DataFrame(fwd_index) all_df.reset_index(inplace=True) @@ -91,9 +92,12 @@ all_df = all_df.rename(columns={'Strike':'strike', 'RecBid': 'rec_bid', 'Tail': 'tail', 'DeltaPay': 'delta_pay'}) -del all_df['VolBpd'], all_df['VolChg'], all_df['Sprd'] +del all_df['VolBpd'], all_df['VolChg'] +if 'Sprd' in all_df: + del all_df['Sprd'] all_df['quote_source'] = 'GS' from db import dbengine serenitasdb = dbengine('serenitasdb') all_df.to_sql('swaption_quotes', serenitasdb, if_exists='append', index=False) +index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) index_df.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False) |
