diff options
Diffstat (limited to 'python/parse_emails.py')
| -rw-r--r-- | python/parse_emails.py | 31 |
1 files changed, 18 insertions, 13 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py index 9bf84fc5..7b58a856 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -99,7 +99,7 @@ def parse_baml_block(fh, indextype): break line = re.sub("[/|]", " ", line) vals = re.sub(" +", " ", line).rstrip().split(" ") - if len(vals) < 10: + if len(vals) < 3: ## something went wrong line = "" break r.append(vals) @@ -169,6 +169,17 @@ def parse_email(email_path): raise RuntimeError("can't parse subject line: {0} for email {1}".format( subject, email_path.name)) +def write_todb(swaption_stack, index_data): + from sqlalchemy import MetaData, Table + from db import dbengine, nan_to_null + import psycopg2 + serenitasdb = dbengine('serenitasdb') + psycopg2.extensions.register_adapter(float, nan_to_null) + meta = MetaData(bind=serenitasdb) + swaption_quotes = Table('swaption_quotes', meta, autoload=True) + ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute() + index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False) + if __name__=="__main__": import pickle update_emails() @@ -177,6 +188,7 @@ if __name__=="__main__": index_data = pd.DataFrame() with open(".pickle", "rb") as fh: already_uploaded = pickle.load(fh) + #already_uploaded = set() for f in emails: if f.name in already_uploaded: continue @@ -200,17 +212,10 @@ if __name__=="__main__": # feather.write_dataframe(swaption_stack, '../../data/swaptions.fth') # feather.write_dataframe(index_data, '../../data/index_data.fth') - swaption_stack = swaption_stack.drop_duplicates() swaption_stack = swaption_stack.reset_index() - index_data = index_data.drop_duplicates() - from db import dbengine, nan_to_null - import psycopg2 - serenitasdb = dbengine('serenitasdb') - psycopg2.extensions.register_adapter(float, nan_to_null) - from sqlalchemy import MetaData, Table - meta = MetaData(bind=serenitasdb) - swaption_quotes = Table('swaption_quotes', meta, autoload=True) - ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute() - index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append') + swaption_stack = swaption_stack.drop_duplicates(['quotedate', 'index', 'series', 'expiry', 'strike']) + index_data = index_data.reset_index() + index_data = index_data.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) + write_todb(swaption_stack, index_data) with open(".pickle", "wb") as fh: - pickle.dump(already_uploaded, fh) + pickle.dump(already_uploaded, fh) |
