5 files changed, 50 insertions, 33 deletions
diff --git a/python/analytics/option.py b/python/analytics/option.py
index 6ac45a2b..ac14ca87 100644
--- a/python/analytics/option.py
+++ b/python/analytics/option.py
@@ -460,8 +460,10 @@ def _get_keys(df, models=["black", "precise"]):
 class QuoteSurface():
     def __init__(self, index_type, series, tenor='5yr', trade_date=datetime.date.today()):
         self._quotes = pd.read_sql_query(
-            "SELECT swaption_quotes.*, ref, fwdspread FROM swaption_quotes " \
-            "JOIN swaption_ref_quotes USING (quotedate, index, series, expiry)" \
+            "SELECT quotedate, index, series, ref, fwdspread, expiry, " \
+            "swaption_quotes.*, source " \
+            "FROM swaption_quotes " \
+            "JOIN swaption_ref_quotes USING (ref_id)" \
             "WHERE quotedate::date = %s AND index= %s AND series = %s " \
             "AND quote_source != 'SG' " \
             "ORDER BY quotedate DESC",
diff --git a/python/calibrate_swaption.py b/python/calibrate_swaption.py
index d10236f7..63a44213 100644
--- a/python/calibrate_swaption.py
+++ b/python/calibrate_swaption.py
@@ -12,7 +12,7 @@ serenitas_engine = dbengine('serenitasdb')
 
 def get_data(index, series, date=datetime.date.min):
     df = pd.read_sql_query("SELECT * from swaption_ref_quotes JOIN swaption_quotes " \
-                           "USING (quotedate, index, series, expiry) WHERE index=%s and series=%s " \
+                           "USING (ref_id) WHERE index=%s and series=%s " \
                            "and quotedate >=%s ORDER BY quotedate",
                            serenitas_engine,
                            params=(index, series, date), parse_dates=['quotedate', 'expiry'])
@@ -22,11 +22,11 @@ def get_data(index, series, date=datetime.date.min):
     return df
 
 def get_data_latest():
-    df = pd.read_sql_query("SELECT swaption_quotes.*, ref FROM swaption_quotes " \
-                           "JOIN swaption_ref_quotes USING (quotedate, index, series, expiry) " \
-                           "LEFT JOIN swaption_calib " \
-                           "USING (quotedate, index, series, expiry, strike) " \
-                           "WHERE swaption_calib.quotedate is NULL",
+    df = pd.read_sql_query("SELECT quotedate, index, series, expiry, ref, quote_source, "
+                           "swaption_quotes.* FROM swaption_ref_quotes " \
+                           "JOIN swaption_quotes USING (ref_id) " \
+                           "LEFT JOIN swaption_calib USING (quote_id) " \
+                           "WHERE swaption_calib.quote_id is NULL",
                            serenitas_engine,
                            parse_dates=['quotedate', 'expiry'])
     df.loc[(df.quote_source == "GS") & (df['index'] == "HY"),
@@ -53,7 +53,7 @@ def calib(option, ref, strike, pay_bid, pay_offer, rec_bid, rec_offer):
                 print(e)
             else:
                 r.append(option.sigma)
-    return [strike] + r
+    return r
 
 @contextmanager
 def MaybePool(nproc):
@@ -61,7 +61,7 @@ def MaybePool(nproc):
 
 def calibrate(index_type=None, series=None, date=None, nproc=4, latest=False):
     sql_str = ("INSERT INTO swaption_calib VALUES({}) ON CONFLICT DO NOTHING".
-               format(",".join(["%s"] * 9)))
+               format(",".join(["%s"] * 5)))
     if latest:
         data = get_data_latest()
     else:
@@ -78,7 +78,7 @@ def calibrate(index_type=None, series=None, date=None, nproc=4, latest=False):
                 r = pstarmap(mycalib, df[['ref', 'strike', 'pay_bid',
                                           'pay_offer', 'rec_bid', 'rec_offer']].
                              itertuples(index=False, name=None))
-                to_insert = [[a, index_type, series, expiry] + b for a, b in zip(df.quotedate.tolist(), r)]
+                to_insert = [[a] + b for a, b in zip(df.quote_id, r)]
                 serenitas_engine.execute(sql_str, to_insert)
 
 if __name__ == "__main__":
diff --git a/python/mark_swaptions.py b/python/mark_swaptions.py
index 671467f9..d9edade8 100644
--- a/python/mark_swaptions.py
+++ b/python/mark_swaptions.py
@@ -26,8 +26,10 @@ def get_swaptionvol(index, series, strike, expiry, date, engine):
     r = engine.execute(
         "SELECT ref, vol_payer AS payer, vol_receiver AS receiver, " \
         "vol_payer_black AS payer_black, vol_receiver_black AS receiver_black " \
-        "FROM swaption_calib LEFT JOIN swaption_ref_quotes " \
-        "USING (quotedate, expiry, index, series) WHERE index = %s " \
+        "FROM swaption_calib " \
+        "LEFT JOIN swaption_quotes USING (quote_id) " \
+        "LEFT JOIN swaption_ref_quotes USING (ref_id) "\
+        "WHERE index = %s " \
         "AND series = %s AND strike=%s AND expiry = %s AND quotedate::date=%s " \
         "ORDER BY quotedate desc",
         (index, series, strike, expiry, date))
diff --git a/python/parse_emails.py b/python/parse_emails.py
index cf7dc88f..d93c17b6 100644
--- a/python/parse_emails.py
+++ b/python/parse_emails.py
@@ -25,7 +25,7 @@ def list_imm_dates(date):
     return r
 
 def makedf(r, indextype, quote_source):
-    if indextype=='IG':
+    if indextype == 'IG':
         cols = ['strike', 'rec_bid', 'rec_offer', 'delta_rec', 'pay_bid',
                 'pay_offer', 'delta_pay', 'vol']
     else:
@@ -38,7 +38,7 @@ def makedf(r, indextype, quote_source):
     df = pd.DataFrame.from_records(r, columns = cols)
     for col in ['delta_rec', 'delta_pay', 'vol', 'price_vol', 'gamma', 'tail']:
         if col in df:
-            df[col] = df[col].str.strip("%").astype('float')/100
+            df[col] = df[col].str.strip("%").astype('float') / 100
     if quote_source == "GS":
         for col in ["pay_bid", "pay_offer", "rec_bid", "rec_offer"]:
             df[col] = df[col].str.strip('-')
@@ -49,7 +49,6 @@ def makedf(r, indextype, quote_source):
                 df[k] = pd.to_numeric(df[k])
             except ValueError:
                 pdb.set_trace()
-    df['quote_source'] = quote_source
     df.set_index('strike', inplace=True)
     return df
 
@@ -103,6 +102,7 @@ def parse_baml(fh, indextype, series, quotedate, *args):
     if option_stack:
         fwd_index = pd.DataFrame.from_records(fwd_index,
                                               index='quotedate')
+        fwd_index['quote_source'] = 'BAML'
         return option_stack, fwd_index
     else:
         raise RuntimeError("empty email: " + fh.name)
@@ -295,6 +295,7 @@ def parse_gs(fh, indextype, series, quotedate, ref):
                 logging.error("Can't parse expiry line:", line)
     fwd_index = pd.DataFrame.from_records(fwd_index,
                                           index='quotedate')
+    fwd_index['quote_source'] = 'GS'
     return option_stack, fwd_index
 
 subject_baml = re.compile("(?:Fwd:){0,2}(?:BAML )?(\w{2})([0-9]{1,2})\s")
@@ -333,27 +334,38 @@ def parse_email(email, date_received):
                                               'ref': ref,
                                               'index': indextype,
                                               'series': series,
-                                              'expiry': list(option_stack.keys())})
-                    fwd_index.set_index('quotedate', inplace = True)
+                                              'expiry': list(option_stack.keys()),
+                                              'quote_source': source})
+                    fwd_index.set_index('quotedate', inplace=True)
                     return (quotedate, indextype, series), (option_stack, fwd_index)
         else:
             raise RuntimeError("can't parse subject line: {0} for email {1}".format(
                 subject, email.name))
 
 def write_todb(swaption_stack, index_data):
-    conn  = dbconn('serenitasdb')
+    def gen_sql_str(query, table_name, columns):
+        return query.format(sql.Identifier(table_name),
+                            sql.SQL(", ").join(sql.Identifier(c) for c in columns),
+                            sql.SQL(", ").join(sql.Placeholder() * len(columns)))
+    conn = dbconn('serenitasdb')
+    query = sql.SQL("INSERT INTO {}({}) VALUES({}) " \
+                    "ON CONFLICT DO NOTHING RETURNING ref_id")
+    sql_str = gen_sql_str(query, "swaption_ref_quotes", index_data.columns)
     query = sql.SQL("INSERT INTO {}({}) VALUES({}) " \
                     "ON CONFLICT DO NOTHING")
-    for df, table in zip([index_data, swaption_stack],
-                         ["swaption_ref_quotes", "swaption_quotes"]):
-        cols = df.columns
-        sql_str = query.format(sql.Identifier(table),
-                               sql.SQL(", ").join(sql.Identifier(c) for c in cols),
-                               sql.SQL(", ").join(sql.Placeholder() * len(cols)))
-        with conn.cursor() as c:
-            c.executemany(sql_str, df.itertuples(index=False))
-        conn.commit()
-    conn.close()
+    with conn.cursor() as c:
+        for t in index_data.itertuples(index=False):
+            c.execute(sql_str, t)
+            try:
+                ref_id, = next(c)
+            except StopIteration:
+                continue
+            else:
+                df = swaption_stack.loc[(t.quotedate, t.index, t.series, t.expiry)]
+                df['ref_id'] = ref_id
+                c.executemany(gen_sql_str(query, "swaption_quotes", df.columns),
+                              df.itertuples(index=False))
+            conn.commit()
 
 def get_email_list(date):
     """returns a list of email file names for a given date
@@ -414,6 +426,7 @@ if __name__=="__main__":
     swaption_stack = pd.concat(swaption_stack, names=['quotedate', 'index', 'series'])
     swaption_stack = swaption_stack.reset_index()
     swaption_stack = swaption_stack.drop_duplicates(['quotedate', 'index', 'series', 'expiry', 'strike'])
+    swaption_stack = swaption_stack.set_index(['quotedate', 'index', 'series', 'expiry'])
     index_data = index_data.reset_index()
     index_data = index_data.drop_duplicates(['quotedate', 'index', 'series', 'expiry'])
     write_todb(swaption_stack, index_data)
diff --git a/python/parse_gs_exchange.py b/python/parse_gs_exchange.py
index a39b034a..49edb970 100644
--- a/python/parse_gs_exchange.py
+++ b/python/parse_gs_exchange.py
@@ -2,7 +2,6 @@ from db import dbconn
 from exchange import get_msgs
 from pytz import timezone
 from parse_emails import write_todb
-
 import datetime
 import logging
 import pandas as pd
@@ -99,7 +98,6 @@ def clean_df(all_df):
     del all_df['VolBpd'], all_df['VolChg']
     if 'Sprd' in all_df:
         del all_df['Sprd']
-    all_df['quote_source'] = 'GS'
     return all_df
 
 if __name__=="__main__":
@@ -112,8 +110,10 @@ if __name__=="__main__":
             logging.exception(e)
 
         swaption_quotes[(quotedate, indextype, series)] = df
-
-    swaption_quotes = clean_df(swaption_quotes)
     index_df = pd.DataFrame(fwd_index)
     index_df = index_df.drop_duplicates(['quotedate', 'index', 'series', 'expiry'])
+    index_df['quote_source'] = 'GS'
+    swaption_quotes = clean_df(swaption_quotes)
+    swaption_quotes = swaption_quotes.set_index(['quotedate', 'index', 'series', 'expiry'],
+                                                append=True)
     write_todb(swaption_quotes, index_df)