diff options
| -rw-r--r-- | python/download_emails.py | 2 | ||||
| -rw-r--r-- | python/parse_emails.py | 40 | ||||
| -rw-r--r-- | python/send_email.py | 2 |
3 files changed, 22 insertions, 22 deletions
diff --git a/python/download_emails.py b/python/download_emails.py index 1d0c54c8..4c41dd2e 100644 --- a/python/download_emails.py +++ b/python/download_emails.py @@ -87,7 +87,7 @@ def update_emails(): service = get_gmail_service() labelsdict = labels_dict(service, 'me') - p = Path('../../data/swaptions/') + p = Path(os.getenv("DATA_DIR")) / Path('swaptions') current_msgs = set([f.name for f in p.iterdir() if f.is_file()]) for msg in ListMessagesWithLabels(service, 'me', labelsdict['swaptions']): if msg['id'] not in current_msgs: diff --git a/python/parse_emails.py b/python/parse_emails.py index 0d0d25cb..5c5fb8bd 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -4,8 +4,10 @@ import os import pdb from download_emails import update_emails import datetime -import sys import logging +import pickle +import sys + logging.basicConfig(filename=os.path.join(os.getenv("LOG_DIR"), 'emails_parsing.log'), level=logging.WARNING, @@ -143,8 +145,8 @@ def parse_ms(fh, indextype): subject_BAML = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s") subject_MS = re.compile("\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^\d]*([\d.]+)") -def parse_email(email_path): - with open(email_path.path, "rt") as fh: +def parse_email(email): + with open(email.path, "rt") as fh: date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000) subject = next(fh) m = subject_BAML.match(subject) @@ -168,7 +170,7 @@ def parse_email(email_path): fwd_index.set_index('quotedate', inplace = True) return (quotedate, indextype, series), (option_stack, fwd_index) raise RuntimeError("can't parse subject line: {0} for email {1}".format( - subject, email_path.name)) + subject, email.name)) def write_todb(swaption_stack, index_data): from sqlalchemy import MetaData, Table @@ -182,22 +184,20 @@ def write_todb(swaption_stack, index_data): index_data.to_sql('swaption_ref_quotes', serenitasdb, if_exists='append', index=False) def get_email_list(date): - data_dir = "/home/share/CorpCDOs/data/swaptions" - emails = [f for f in os.scandir(data_dir) if f.is_file()] - r = [] - for f in emails: - try: - key, (option_stack, fwd_index) = parse_email(f) - except RuntimeError as e: - print(e) - else: - if key[0].date() == date: - print(f.name) - r.append(key + (f.name,)) - return r + """returns a list of email file names for a given date + + Parameters + ---------- + date : string + """ + with open(".pickle", "rb") as fh: + already_uploaded = pickle.load(fh) + df = pd.DataFrame.from_dict(already_uploaded, orient='index') + df.columns = ['quotedate'] + df = df.reset_index().set_index('quotedate') + return df.loc[date,'index'].tolist() if __name__=="__main__": - import pickle update_emails() data_dir = os.path.join(os.getenv("DATA_DIR"), "swaptions") emails = [f for f in os.scandir(data_dir) if f.is_file()] @@ -207,7 +207,7 @@ if __name__=="__main__": with open(".pickle", "rb") as fh: already_uploaded = pickle.load(fh) except FileNotFoundError: - already_uploaded = set() + already_uploaded = {} for f in emails: if f.name in already_uploaded: continue @@ -219,7 +219,7 @@ if __name__=="__main__": else: swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike']) index_data = index_data.append(fwd_index) - already_uploaded.add(f.name) + already_uploaded[f.name] = key[0] if index_data.empty: sys.exit() for col in ['fwdbpv', 'fwdprice', 'fwdspread', 'ref']: diff --git a/python/send_email.py b/python/send_email.py index f39fb5b1..3c2ae1a0 100644 --- a/python/send_email.py +++ b/python/send_email.py @@ -11,7 +11,7 @@ from email.mime.text import MIMEText import sys import argparse -#flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args() +flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args() SCOPES = 'https://www.googleapis.com/auth/gmail.modify' CLIENT_SECRET_FILE = 'secret.json' |
