aboutsummaryrefslogtreecommitdiffstats
path: root/python/parse_emails.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/parse_emails.py')
-rw-r--r--python/parse_emails.py40
1 files changed, 20 insertions, 20 deletions
diff --git a/python/parse_emails.py b/python/parse_emails.py
index 0d0d25cb..5c5fb8bd 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -4,8 +4,10 @@ import os
import pdb
from download_emails import update_emails
import datetime
-import sys
import logging
+import pickle
+import sys
+
logging.basicConfig(filename=os.path.join(os.getenv("LOG_DIR"), 'emails_parsing.log'),
level=logging.WARNING,
@@ -143,8 +145,8 @@ def parse_ms(fh, indextype):
subject_BAML = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
subject_MS = re.compile("\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^\d]*([\d.]+)")
-def parse_email(email_path):
- with open(email_path.path, "rt") as fh:
+def parse_email(email):
+ with open(email.path, "rt") as fh:
date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000)
subject = next(fh)
m = subject_BAML.match(subject)
@@ -168,7 +170,7 @@ def parse_email(email_path):
fwd_index.set_index('quotedate', inplace = True)
return (quotedate, indextype, series), (option_stack, fwd_index)
raise RuntimeError("can't parse subject line: {0} for email {1}".format(
- subject, email_path.name))
+ subject, email.name))
def write_todb(swaption_stack, index_data):
from sqlalchemy import MetaData, Table
@@ -182,22 +184,20 @@ def write_todb(swaption_stack, index_data):
index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False)
def get_email_list(date):
- data_dir = "/home/share/CorpCDOs/data/swaptions"
- emails = [f for f in os.scandir(data_dir) if f.is_file()]
- r = []
- for f in emails:
- try:
- key, (option_stack, fwd_index) = parse_email(f)
- except RuntimeError as e:
- print(e)
- else:
- if key[0].date() == date:
- print(f.name)
- r.append(key + (f.name,))
- return r
+ """returns a list of email file names for a given date
+
+ Parameters
+ ----------
+ date : string
+ """
+ with open(".pickle", "rb") as fh:
+ already_uploaded = pickle.load(fh)
+ df = pd.DataFrame.from_dict(already_uploaded, orient='index')
+ df.columns = ['quotedate']
+ df = df.reset_index().set_index('quotedate')
+ return df.loc[date,'index'].tolist()
if __name__=="__main__":
- import pickle
update_emails()
data_dir = os.path.join(os.getenv("DATA_DIR"), "swaptions")
emails = [f for f in os.scandir(data_dir) if f.is_file()]
@@ -207,7 +207,7 @@ if __name__=="__main__":
with open(".pickle", "rb") as fh:
already_uploaded = pickle.load(fh)
except FileNotFoundError:
- already_uploaded = set()
+ already_uploaded = {}
for f in emails:
if f.name in already_uploaded:
continue
@@ -219,7 +219,7 @@ if __name__=="__main__":
else:
swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike'])
index_data = index_data.append(fwd_index)
- already_uploaded.add(f.name)
+ already_uploaded[f.name] = key[0]
if index_data.empty:
sys.exit()
for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']: