1 files changed, 146 insertions, 0 deletions
diff --git a/python/option_trades.py b/python/option_trades.py
new file mode 100644
index 00000000..1b4f18d3
--- /dev/null
+++ b/python/option_trades.py
@@ -0,0 +1,146 @@
+import pandas as pd
+from arch import arch_model
+import math
+from db import dbengine
+import numpy as np
+from scipy.interpolate import interp1d
+
+serenitasdb = dbengine('serenitasdb')
+
+def get_daily_pnl(index, series, tenor, coupon=1):
+    sql_str = "SELECT date, adjcloseprice AS close, closespread AS spread, duration, theta FROM index_quotes " \
+    "WHERE index=%s and series=%s and tenor = %s"
+    df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'],
+                           index_col=['date'], params=(index, series, tenor))
+    df.sort_index(inplace=True)
+    df['dt'] = df.index.to_series().diff().astype('timedelta64[D]')
+    df['pnl'] = df['close'].ffill().diff() + df.dt/360*coupon
+    return df
+
+def daily_spreads(index, series, tenor):
+    """computes daily spreads returns
+
+    Parameters
+    ----------
+    index : string
+    series : int
+    tenor : string
+    """
+    sql_str = "SELECT date, closespread AS spread FROM index_quotes " \
+              "WHERE index=%s and series=%s and tenor = %s"
+    df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'], index_col=['date'],
+                           params=(index.upper(), series, tenor))
+    df.sort_index(inplace=True)
+    return df.spread.pct_change().dropna()
+
+def index_returns(date=None, years=3, index="IG", tenor="5yr"):
+    """computes on the run returns"""
+    if date is None:
+        date = (pd.Timestamp.now() - pd.DateOffset(years=years)).date()
+    sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \
+              "WHERE index=%s and date>=%s and tenor = %s"
+    df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'], index_col=['date', 'series'],
+                           params=(index.upper(), date, tenor))
+    df.sort_index(inplace=True)
+    return (df.groupby(level='series').
+            transform(lambda x: x.pct_change()).
+            groupby(level='date').
+            last())
+
+def realized_vol(index, series, tenor, date=None, years=None):
+    """computes the realized spread volatility"""
+    if date is no
+    returns = daily_spreads(index, series, tenor)
+    am = arch_model(returns)
+    res = am.fit(update_freq=0, disp='off')
+    return (res.conditional_volatility * math.sqrt(252), res)
+
+def atm_vol_fun(v, ref_is_price=False, moneyness=0.2):
+    f = interp1d(v.strike.values, v.vol.values, fill_value='extrapolate')
+    atm_val = v['fwdspread'].iat[0]
+    otm_val = atm_val * (1 + moneyness) ## doesn't make sense for HY
+    return pd.Series(f(np.array([atm_val, otm_val])), index = ['atm_vol', 'otm_vol'])
+
+def atm_vol(index, series, moneyness=0.2):
+    df =  pd.read_sql_query('SELECT quotedate, expiry, strike, vol from swaption_quotes ' \
+                            'WHERE index = %s and series = %s',
+                            serenitasdb, index_col=['quotedate', 'expiry'],
+                            params = (index.upper(), series))
+    index_data = pd.read_sql_query(
+        'SELECT quotedate, expiry, fwdspread from swaption_ref_quotes ' \
+        'WHERE index= %s and series = %s',
+        serenitasdb, index_col = ['quotedate', 'expiry'],
+        params = (index.upper(), series))
+
+    df = df.join(index_data)
+    df = df.groupby(level=['quotedate', 'expiry']).filter(lambda x: len(x)>=2)
+    df = df.groupby(level=['quotedate', 'expiry']).apply(atm_vol_fun, index=="HY", moneyness)
+    df = df.reset_index(level=-1) #move expiry back to the column
+    return df
+
+def atm_vol_date(index, date):
+    df =  pd.read_sql_query('SELECT quotedate, series, expiry, strike, vol ' \
+                            'FROM swaption_quotes ' \
+                            'WHERE index = %s and quotedate >= %s',
+                            serenitasdb,
+                            index_col=['quotedate', 'expiry', 'series'],
+                            params=(index.upper(), date))
+    index_data = pd.read_sql_query(
+        'SELECT quotedate, expiry, series,  fwdspread FROM swaption_ref_quotes ' \
+        'WHERE index= %s and quotedate >= %s',
+        serenitasdb, index_col=['quotedate', 'expiry', 'series'],
+        params = (index.upper(), date))
+    df = df.join(index_data)
+    df = df.groupby(df.index).filter(lambda x: len(x)>=2)
+    df = df.groupby(level=['quotedate', 'expiry', 'series']).apply(atm_vol_fun)
+    df = df.reset_index(level=['expiry', 'series']) #move expiry and series back to the columns
+    return df
+
+def rolling_vol(df, col='atm_vol', term=[3]):
+    """compute the rolling volatility for various terms"""
+    df = df.groupby(df.index).filter(lambda x: len(x)>2)
+    def aux(s, col, term):
+        k = s.index[0]
+        f = interp1d(s.expiry.values.astype('float'), s[col].values, fill_value='extrapolate')
+        x = np.array([(k + pd.DateOffset(months=t)).to_datetime64().astype('float') \
+                      for t in term])
+        return pd.Series(f(x), index=[str(t)+'m' for t in term])
+
+    df = df.groupby(level='quotedate').apply(aux, col, term)
+    # MS quotes don't have fwdspread so they end up as NA
+    return df.dropna()
+
+def vol_var(percentile=0.99, index='IG'):
+    df = atm_vol_date("IG", datetime.date(2014, 6, 11))
+    df = rolling_vol(df, term=[1,2,3])
+    df = df.sort_index()
+    df = df.groupby(df.index.date).last()
+    return df.pct_change().quantile(percentile)
+
+def lr_var(res):
+    """ computes long run variance of the garch process"""
+    var = res.params.omega/(1 - res.params['alpha[1]'] - res.params['beta[1]'])
+    return math.sqrt(var) * math.sqrt(252)
+
+if __name__ == "__main__":
+    series = 23
+    rv, res = realized_vol("ig", series, "5yr")
+    rv = pd.DataFrame(rv)
+    rv = rv.reset_index()
+    df_vol = atm_vol("ig", series)
+    df_vol = rolling_vol(df_vol, term=[1, 2, 3])
+    realized_vs_atm = pd.merge_asof(rv, df_vol, on='date')
+    realized_vs_atm.set_index('date', inplace=True)
+    fig = realized_vs_atm[['cond_vol', '1m', '2m', '3m']].plot()
+
+    #compute series
+    top10 = pd.DataFrame()
+    for series in [23, 24, 25, 26, 27]:
+        df_vol = atm_vol("ig", series)
+        df_vol = rolling_vol(df_vol, term=[1, 2, 3])
+        df_vol.set_index('date', inplace=True)
+        daily_vol = df_vol.resample('D').last()
+        daily_vol['series'] = series
+        daily_vol = pd.DataFrame(daily_vol['3m'].diff().abs().nlargest(10))
+        daily_vol['series'] = series
+        top10 = top10.append(daily_vol)