aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_emails.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/parse_emails.py')
-rw-r--r--python/parse_emails.py31
1 files changed, 18 insertions, 13 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index 9bf84fc5..7b58a856 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -99,7 +99,7 @@ def parse_baml_block(fh, indextype):
break
line = re.sub("[/|]", " ", line)
vals = re.sub(" +", " ", line).rstrip().split(" ")
- if len(vals) < 10:
+ if len(vals) < 3: ## something went wrong
line = ""
break
r.append(vals)
@@ -169,6 +169,17 @@ def parse_email(email_path):
raise RuntimeError("can't parse subject line: {0} for email {1}".format(
subject, email_path.name))
+def write_todb(swaption_stack, index_data):
+ from sqlalchemy import MetaData, Table
+ from db import dbengine, nan_to_null
+ import psycopg2
+ serenitasdb = dbengine('serenitasdb')
+ psycopg2.extensions.register_adapter(float, nan_to_null)
+ meta = MetaData(bind=serenitasdb)
+ swaption_quotes = Table('swaption_quotes', meta, autoload=True)
+ ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute()
+ index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False)
+
if __name__=="__main__":
import pickle
update_emails()
@@ -177,6 +188,7 @@ if __name__=="__main__":
index_data = pd.DataFrame()
with open(".pickle", "rb") as fh:
already_uploaded = pickle.load(fh)
+ #already_uploaded = set()
for f in emails:
if f.name in already_uploaded:
continue
@@ -200,17 +212,10 @@ if __name__=="__main__":
# feather.write_dataframe(swaption_stack, '../../data/swaptions.fth')
# feather.write_dataframe(index_data, '../../data/index_data.fth')
- swaption_stack = swaption_stack.drop_duplicates()
swaption_stack = swaption_stack.reset_index()
- index_data = index_data.drop_duplicates()
- from db import dbengine, nan_to_null
- import psycopg2
- serenitasdb = dbengine('serenitasdb')
- psycopg2.extensions.register_adapter(float, nan_to_null)
- from sqlalchemy import MetaData, Table
- meta = MetaData(bind=serenitasdb)
- swaption_quotes = Table('swaption_quotes', meta, autoload=True)
- ins = swaption_quotes.insert().values(swaption_stack.to_dict(orient='records')).execute()
- index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append')
+ swaption_stack = swaption_stack.drop_duplicates(['quotedate', 'index', 'series', 'expiry', 'strike'])
+ index_data = index_data.reset_index()
+ index_data = index_data.drop_duplicates(['quotedate', 'index', 'series', 'expiry'])
+ write_todb(swaption_stack, index_data)
with open(".pickle", "wb") as fh:
- pickle.dump(already_uploaded, fh)
+ pickle.dump(already_uploaded, fh)