diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/calibrate_swaption.py | 31 | ||||
| -rw-r--r-- | python/download_emails.py | 18 | ||||
| -rw-r--r-- | python/parse_emails.py | 35 |
3 files changed, 50 insertions, 34 deletions
diff --git a/python/calibrate_swaption.py b/python/calibrate_swaption.py index 63a44213..ce145224 100644 --- a/python/calibrate_swaption.py +++ b/python/calibrate_swaption.py @@ -1,39 +1,43 @@ import pandas as pd -from analytics import Index, Swaption +from analytics import CreditIndex, Swaption import datetime from db import dbengine from contextlib import contextmanager from itertools import starmap from functools import partial from multiprocessing import Pool -from itertools import repeat serenitas_engine = dbengine('serenitasdb') + def get_data(index, series, date=datetime.date.min): - df = pd.read_sql_query("SELECT * from swaption_ref_quotes JOIN swaption_quotes " \ - "USING (ref_id) WHERE index=%s and series=%s " \ + df = pd.read_sql_query("SELECT * from swaption_ref_quotes JOIN swaption_quotes " + "USING (ref_id) WHERE index=%s and series=%s " "and quotedate >=%s ORDER BY quotedate", serenitas_engine, - params=(index, series, date), parse_dates=['quotedate', 'expiry']) - df.loc[(df.quote_source == "GS") & (df['index'] =="HY"), + params=(index, series, date), + parse_dates=['quotedate', 'expiry']) + df.loc[(df.quote_source == "GS") & (df['index'] == "HY"), ["pay_bid", "pay_offer", "rec_bid", "rec_offer"]] *= 100 df.quotedate = df.quotedate.dt.tz_convert('America/New_York') return df + def get_data_latest(): - df = pd.read_sql_query("SELECT quotedate, index, series, expiry, ref, quote_source, " - "swaption_quotes.* FROM swaption_ref_quotes " \ - "JOIN swaption_quotes USING (ref_id) " \ - "LEFT JOIN swaption_calib USING (quote_id) " \ + df = pd.read_sql_query("SELECT quotedate, index, series, expiry, ref, " + "quote_source, swaption_quotes.* " + "FROM swaption_ref_quotes " + "JOIN swaption_quotes USING (ref_id) " + "LEFT JOIN swaption_calib USING (quote_id) " "WHERE swaption_calib.quote_id is NULL", serenitas_engine, parse_dates=['quotedate', 'expiry']) df.loc[(df.quote_source == "GS") & (df['index'] == "HY"), - ["pay_bid", "pay_offer", "rec_bid", "rec_offer"]] *=100 + ["pay_bid", "pay_offer", "rec_bid", "rec_offer"]] *= 100 df.quotedate = df.quotedate.dt.tz_convert('America/New_York') return df + def calib(option, ref, strike, pay_bid, pay_offer, rec_bid, rec_offer): option.ref = ref option.strike = strike @@ -55,10 +59,12 @@ def calib(option, ref, strike, pay_bid, pay_offer, rec_bid, rec_offer): r.append(option.sigma) return r + @contextmanager def MaybePool(nproc): yield Pool(nproc) if nproc > 1 else None + def calibrate(index_type=None, series=None, date=None, nproc=4, latest=False): sql_str = ("INSERT INTO swaption_calib VALUES({}) ON CONFLICT DO NOTHING". format(",".join(["%s"] * 5))) @@ -71,7 +77,7 @@ def calibrate(index_type=None, series=None, date=None, nproc=4, latest=False): pstarmap = pool.starmap if pool else starmap for k, v in data.groupby([data['quotedate'].dt.date, 'index', 'series']): trade_date, index_type, series = k - index = Index.from_name(index_type, series, "5yr", trade_date) + index = CreditIndex(index_type, series, "5yr", trade_date) for expiry, df in v.groupby(['expiry']): option = Swaption(index, expiry.date(), 100) mycalib = partial(calib, option) @@ -81,6 +87,7 @@ def calibrate(index_type=None, series=None, date=None, nproc=4, latest=False): to_insert = [[a] + b for a, b in zip(df.quote_id, r)] serenitas_engine.execute(sql_str, to_insert) + if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() diff --git a/python/download_emails.py b/python/download_emails.py index 8c2e980e..6127e38c 100644 --- a/python/download_emails.py +++ b/python/download_emails.py @@ -46,18 +46,17 @@ def ListMessagesWithLabels(service, user_id, label_ids=[]): def ListHistory(service, user_id, label_id=None, start_history_id=10000): - """List History of all changes to the user's mailbox. + """List History of all changes to the user's mailbox. - Args: + Args: service: Authorized Gmail API service instance. user_id: User's email address. The special value "me" can be used to indicate the authenticated user. start_history_id: Only return Histories at or after start_history_id. - Returns: + Returns: A list of mailbox changes that occurred after the start_history_id. - """ - try: + """ history = (service.users().history().list(userId=user_id, startHistoryId=start_history_id, historyTypes="messageAdded", @@ -79,9 +78,6 @@ def ListHistory(service, user_id, label_id=None, start_history_id=10000): for c in change['messagesAdded']: yield c['message'] - except errors.HttpError as error: - print('An error occurred:', error) - def labels_dict(service, user_id): """Returns a dictionary mapping labels to labelids. @@ -185,4 +181,8 @@ def save_emails(update=True): if __name__ == '__main__': - save_emails() + try: + save_emails() + except errors.HttpError as e: + print(e) + save_emails(update=False) diff --git a/python/parse_emails.py b/python/parse_emails.py index 2e01c5b1..9d2a180e 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -144,8 +144,11 @@ def parse_ms_block(fh, indextype): strike = strike.strip() if indextype == "HY": strike = strike.split()[0] - pay_bid, pay_offer, pay_delta = payer.strip().split() - rec_bid, rec_offer, rec_delta = receiver.strip().split() + try: + pay_bid, pay_offer, pay_delta = payer.strip().split() + rec_bid, rec_offer, rec_delta = receiver.strip().split() + except ValueError: + break vals = [strike, rec_bid, rec_offer, rec_delta, pay_bid, pay_offer, pay_delta] @@ -161,7 +164,9 @@ def parse_ms_block(fh, indextype): vol, vol_change, be = vol.split() vals += [vol] r.append(vals) - return makedf(r, indextype, "MS") + else: + return makedf(r, indextype, "MS") + return None def parse_nomura_block(fh, indextype): @@ -260,8 +265,9 @@ def parse_ms(fh, indextype, *args): return option_stack -def parse_nomura(fh, indextype, *args): +def parse_nom(fh, indextype, *args): option_stack = {} + def aux(line, fh, indextype, option_stack): expiry = line.split(" ")[0] expiry = pd.to_datetime(expiry, format="%d-%b-%y") @@ -281,7 +287,6 @@ def parse_nomura(fh, indextype, *args): def parse_sg(fh, indextype, expiration_dates): option_stack = {} - fwd_index = [] for line in fh: line = line.rstrip() if line.startswith("Type"): @@ -315,14 +320,15 @@ def parse_gs(fh, indextype, series, quotedate, ref): subject_baml = re.compile(r"(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s") subject_ms = re.compile(r"[^$]*\$\$ MS CDX OPTIONS: (IG|HY)(\d{2})[^-]*- REF[^\d]*([\d.]+)") -subject_nomura = re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)") +subject_nom = re.compile(r"(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)") subject_gs = re.compile(r"GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)") subject_sg = re.compile(r"SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)") + def parse_email(email, date_received): with open(email.path, "rt") as fh: subject = next(fh) - for source in ['BAML', 'MS', 'NOMURA', 'GS', 'SG']: + for source in ['BAML', 'MS', 'NOM', 'GS', 'SG']: m = globals()['subject_'+source.lower()].match(subject) if m: if source == 'BAML': @@ -406,7 +412,7 @@ def pickle_drop_date(date): pickle.dump(newdict, fh) -if __name__=="__main__": +if __name__ == "__main__": save_emails() data_dir = os.path.join(os.getenv("DATA_DIR"), "swaptions") emails = [f for f in os.scandir(data_dir) if f.is_file()] @@ -419,7 +425,8 @@ if __name__=="__main__": already_uploaded = {} for f in emails: date_received, msg_id = f.name.split("_") - date_received = datetime.datetime.strptime(date_received, "%Y-%m-%d %H-%M-%S") + date_received = datetime.datetime.strptime(date_received, + "%Y-%m-%d %H-%M-%S") if msg_id in already_uploaded: continue else: @@ -428,10 +435,11 @@ if __name__=="__main__": except RuntimeError as e: logging.error(e) else: - if key[0] is None: - logging.error("Something wrong with email: {}".format(f.name)) + if key[0] is None or len(option_stack) == 0: + logging.error(f"Something wrong with email: f.name") continue - swaption_stack[key] = pd.concat(option_stack, names=['expiry', 'strike']) + swaption_stack[key] = pd.concat(option_stack, + names=['expiry', 'strike']) index_data = index_data.append(fwd_index) already_uploaded[msg_id] = key[0] if index_data.empty: @@ -441,7 +449,8 @@ if __name__=="__main__": index_data[col] = index_data[col].astype('float') index_data['index'] = index_data['index'].astype('category') - swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series']) + swaption_stack = pd.concat(swaption_stack, + names=['quotedate', 'index', 'series']) swaption_stack = swaption_stack.reset_index() swaption_stack = swaption_stack.drop_duplicates(['quotedate', 'index', 'series', 'expiry', 'strike']) swaption_stack = swaption_stack.set_index(['quotedate', 'index', 'series', 'expiry']) |
