diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/analytics/option.py | 6 | ||||
| -rw-r--r-- | python/calibrate_swaption.py | 18 | ||||
| -rw-r--r-- | python/mark_swaptions.py | 6 | ||||
| -rw-r--r-- | python/parse_emails.py | 45 | ||||
| -rw-r--r-- | python/parse_gs_exchange.py | 8 |
5 files changed, 50 insertions, 33 deletions
diff --git a/python/analytics/option.py b/python/analytics/option.py index 6ac45a2b..ac14ca87 100644 --- a/python/analytics/option.py +++ b/python/analytics/option.py @@ -460,8 +460,10 @@ def _get_keys(df, models=["black", "precise"]): class QuoteSurface(): def __init__(self, index_type, series, tenor='5yr', trade_date=datetime.date.today()): self._quotes = pd.read_sql_query( - "SELECT swaption_quotes.*, ref, fwdspread FROM swaption_quotes " \ - "JOIN swaption_ref_quotes USING (quotedate, index, series, expiry)" \ + "SELECT quotedate, index, series, ref, fwdspread, expiry, " \ + "swaption_quotes.*, source " \ + "FROM swaption_quotes " \ + "JOIN swaption_ref_quotes USING (ref_id)" \ "WHERE quotedate::date = %s AND index= %s AND series = %s " \ "AND quote_source != 'SG' " \ "ORDER BY quotedate DESC", diff --git a/python/calibrate_swaption.py b/python/calibrate_swaption.py index d10236f7..63a44213 100644 --- a/python/calibrate_swaption.py +++ b/python/calibrate_swaption.py @@ -12,7 +12,7 @@ serenitas_engine = dbengine('serenitasdb') def get_data(index, series, date=datetime.date.min): df = pd.read_sql_query("SELECT * from swaption_ref_quotes JOIN swaption_quotes " \ - "USING (quotedate, index, series, expiry) WHERE index=%s and series=%s " \ + "USING (ref_id) WHERE index=%s and series=%s " \ "and quotedate >=%s ORDER BY quotedate", serenitas_engine, params=(index, series, date), parse_dates=['quotedate', 'expiry']) @@ -22,11 +22,11 @@ def get_data(index, series, date=datetime.date.min): return df def get_data_latest(): - df = pd.read_sql_query("SELECT swaption_quotes.*, ref FROM swaption_quotes " \ - "JOIN swaption_ref_quotes USING (quotedate, index, series, expiry) " \ - "LEFT JOIN swaption_calib " \ - "USING (quotedate, index, series, expiry, strike) " \ - "WHERE swaption_calib.quotedate is NULL", + df = pd.read_sql_query("SELECT quotedate, index, series, expiry, ref, quote_source, " + "swaption_quotes.* FROM swaption_ref_quotes " \ + "JOIN swaption_quotes USING (ref_id) " \ + "LEFT JOIN swaption_calib USING (quote_id) " \ + "WHERE swaption_calib.quote_id is NULL", serenitas_engine, parse_dates=['quotedate', 'expiry']) df.loc[(df.quote_source == "GS") & (df['index'] == "HY"), @@ -53,7 +53,7 @@ def calib(option, ref, strike, pay_bid, pay_offer, rec_bid, rec_offer): print(e) else: r.append(option.sigma) - return [strike] + r + return r @contextmanager def MaybePool(nproc): @@ -61,7 +61,7 @@ def MaybePool(nproc): def calibrate(index_type=None, series=None, date=None, nproc=4, latest=False): sql_str = ("INSERT INTO swaption_calib VALUES({}) ON CONFLICT DO NOTHING". - format(",".join(["%s"] * 9))) + format(",".join(["%s"] * 5))) if latest: data = get_data_latest() else: @@ -78,7 +78,7 @@ def calibrate(index_type=None, series=None, date=None, nproc=4, latest=False): r = pstarmap(mycalib, df[['ref', 'strike', 'pay_bid', 'pay_offer', 'rec_bid', 'rec_offer']]. itertuples(index=False, name=None)) - to_insert = [[a, index_type, series, expiry] + b for a, b in zip(df.quotedate.tolist(), r)] + to_insert = [[a] + b for a, b in zip(df.quote_id, r)] serenitas_engine.execute(sql_str, to_insert) if __name__ == "__main__": diff --git a/python/mark_swaptions.py b/python/mark_swaptions.py index 671467f9..d9edade8 100644 --- a/python/mark_swaptions.py +++ b/python/mark_swaptions.py @@ -26,8 +26,10 @@ def get_swaptionvol(index, series, strike, expiry, date, engine): r = engine.execute( "SELECT ref, vol_payer AS payer, vol_receiver AS receiver, " \ "vol_payer_black AS payer_black, vol_receiver_black AS receiver_black " \ - "FROM swaption_calib LEFT JOIN swaption_ref_quotes " \ - "USING (quotedate, expiry, index, series) WHERE index = %s " \ + "FROM swaption_calib " \ + "LEFT JOIN swaption_quotes USING (quote_id) " \ + "LEFT JOIN swaption_ref_quotes USING (ref_id) "\ + "WHERE index = %s " \ "AND series = %s AND strike=%s AND expiry = %s AND quotedate::date=%s " \ "ORDER BY quotedate desc", (index, series, strike, expiry, date)) diff --git a/python/parse_emails.py b/python/parse_emails.py index cf7dc88f..d93c17b6 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -25,7 +25,7 @@ def list_imm_dates(date): return r def makedf(r, indextype, quote_source): - if indextype=='IG': + if indextype == 'IG': cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid', 'pay_offer', 'delta_pay', 'vol'] else: @@ -38,7 +38,7 @@ def makedf(r, indextype, quote_source): df = pd.DataFrame.from_records(r, columns = cols) for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma', 'tail']: if col in df: - df[col] = df[col].str.strip("%").astype('float')/100 + df[col] = df[col].str.strip("%").astype('float') / 100 if quote_source == "GS": for col in ["pay_bid", "pay_offer", "rec_bid", "rec_offer"]: df[col] = df[col].str.strip('-') @@ -49,7 +49,6 @@ def makedf(r, indextype, quote_source): df[k] = pd.to_numeric(df[k]) except ValueError: pdb.set_trace() - df['quote_source'] = quote_source df.set_index('strike', inplace=True) return df @@ -103,6 +102,7 @@ def parse_baml(fh, indextype, series, quotedate, *args): if option_stack: fwd_index = pd.DataFrame.from_records(fwd_index, index='quotedate') + fwd_index['quote_source'] = 'BAML' return option_stack, fwd_index else: raise RuntimeError("empty email: " + fh.name) @@ -295,6 +295,7 @@ def parse_gs(fh, indextype, series, quotedate, ref): logging.error("Can't parse expiry line:", line) fwd_index = pd.DataFrame.from_records(fwd_index, index='quotedate') + fwd_index['quote_source'] = 'GS' return option_stack, fwd_index subject_baml = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s") @@ -333,27 +334,38 @@ def parse_email(email, date_received): 'ref': ref, 'index': indextype, 'series': series, - 'expiry': list(option_stack.keys())}) - fwd_index.set_index('quotedate', inplace = True) + 'expiry': list(option_stack.keys()), + 'quote_source': source}) + fwd_index.set_index('quotedate', inplace=True) return (quotedate, indextype, series), (option_stack, fwd_index) else: raise RuntimeError("can't parse subject line: {0} for email {1}".format( subject, email.name)) def write_todb(swaption_stack, index_data): - conn = dbconn('serenitasdb') + def gen_sql_str(query, table_name, columns): + return query.format(sql.Identifier(table_name), + sql.SQL(", ").join(sql.Identifier(c) for c in columns), + sql.SQL(", ").join(sql.Placeholder() * len(columns))) + conn = dbconn('serenitasdb') + query = sql.SQL("INSERT INTO {}({}) VALUES({}) " \ + "ON CONFLICT DO NOTHING RETURNING ref_id") + sql_str = gen_sql_str(query, "swaption_ref_quotes", index_data.columns) query = sql.SQL("INSERT INTO {}({}) VALUES({}) " \ "ON CONFLICT DO NOTHING") - for df, table in zip([index_data, swaption_stack], - ["swaption_ref_quotes", "swaption_quotes"]): - cols = df.columns - sql_str = query.format(sql.Identifier(table), - sql.SQL(", ").join(sql.Identifier(c) for c in cols), - sql.SQL(", ").join(sql.Placeholder() * len(cols))) - with conn.cursor() as c: - c.executemany(sql_str, df.itertuples(index=False)) - conn.commit() - conn.close() + with conn.cursor() as c: + for t in index_data.itertuples(index=False): + c.execute(sql_str, t) + try: + ref_id, = next(c) + except StopIteration: + continue + else: + df = swaption_stack.loc[(t.quotedate, t.index, t.series, t.expiry)] + df['ref_id'] = ref_id + c.executemany(gen_sql_str(query, "swaption_quotes", df.columns), + df.itertuples(index=False)) + conn.commit() def get_email_list(date): """returns a list of email file names for a given date @@ -414,6 +426,7 @@ if __name__=="__main__": swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series']) swaption_stack = swaption_stack.reset_index() swaption_stack = swaption_stack.drop_duplicates(['quotedate', 'index', 'series', 'expiry', 'strike']) + swaption_stack = swaption_stack.set_index(['quotedate', 'index', 'series', 'expiry']) index_data = index_data.reset_index() index_data = index_data.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) write_todb(swaption_stack, index_data) diff --git a/python/parse_gs_exchange.py b/python/parse_gs_exchange.py index a39b034a..49edb970 100644 --- a/python/parse_gs_exchange.py +++ b/python/parse_gs_exchange.py @@ -2,7 +2,6 @@ from db import dbconn from exchange import get_msgs from pytz import timezone from parse_emails import write_todb - import datetime import logging import pandas as pd @@ -99,7 +98,6 @@ def clean_df(all_df): del all_df['VolBpd'], all_df['VolChg'] if 'Sprd' in all_df: del all_df['Sprd'] - all_df['quote_source'] = 'GS' return all_df if __name__=="__main__": @@ -112,8 +110,10 @@ if __name__=="__main__": logging.exception(e) swaption_quotes[(quotedate, indextype, series)] = df - - swaption_quotes = clean_df(swaption_quotes) index_df = pd.DataFrame(fwd_index) index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry']) + index_df['quote_source'] = 'GS' + swaption_quotes = clean_df(swaption_quotes) + swaption_quotes = swaption_quotes.set_index(['quotedate', 'index', 'series', 'expiry'], + append=True) write_todb(swaption_quotes, index_df) |
