2 files changed, 267 insertions, 0 deletions
diff --git a/python/exploration/beta_trade.py b/python/exploration/beta_trade.py
new file mode 100644
index 00000000..1e86a97f
--- /dev/null
+++ b/python/exploration/beta_trade.py
@@ -0,0 +1,42 @@
+import math
+import pandas as pd
+import feather
+from index_data import index_returns
+from arch import arch_model
+from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
+
+returns = index_returns(index=['IG', 'HY'], tenor='5yr')
+returns = (returns.
+           reset_index(level='series').
+           groupby(level=['date','index']).
+           nth(-1)) ## lastest returns
+
+returns = (returns.
+           set_index('series', append=True)['price_return'].
+           unstack(level='index'))
+returns.columns = [col.lower() for col in returns.columns]
+feather.write_dataframe(returns.reset_index(), "/home/share/CorpCDOs/data/index_returns.fth")
+
+# three ways of computing the volatility
+# 20 days simple moving average
+vol_sma = returns.hy.rolling(20).std() * math.sqrt(252)
+vol_ewma = returns.hy.ewm(span=20).std() * math.sqrt(252)
+# GARCH(1,1)
+# we scale returns by 10 to help with the fitting
+scale = 10
+am = arch_model(scale * returns.hy.dropna())
+res = am.fit()
+vol_garch = res.conditional_volatility * math.sqrt(252)/scale
+vol = pd.concat([vol_sma, vol_ewma, vol_garch], axis=1)
+
+## let's get the betas
+beta_ewma = (returns.
+             ewm(span=20).
+             cov().
+             apply(lambda df:df.at['ig', 'hy']/df.at['ig', 'ig'],
+                   ('minor', 'major')))
+
+resids = returns.ig-beta_ewma*returns.hy
+
+# feather.write_dataframe(beta_ewma.reset_index(),
+#                         "/home/share/CorpCDOs/data/beta.fth")
diff --git a/python/exploration/option_trades.py b/python/exploration/option_trades.py
new file mode 100644
index 00000000..3edda8e4
--- /dev/null
+++ b/python/exploration/option_trades.py
@@ -0,0 +1,225 @@
+import cvxpy
+import datetime
+import math
+import numpy as np
+import pandas as pd
+
+from pandas.tseries.offsets import BDay
+from arch import arch_model
+from db import dbengine, dbconn
+from scipy.interpolate import interp1d
+from analytics import Index
+from index_data import index_returns
+serenitasdb = dbengine('serenitasdb')
+
+def realized_vol(index, series, tenor='5yr', date=None, years=None):
+    """computes the realized spread volatility"""
+    if date is None:
+        if years is None:
+            raise ValueError("need to provide at least one of date or years")
+        date = (pd.Timestamp.now() - pd.DateOffset(years=years)).date()
+    returns = index_returns(index=index, series=series, tenor=tenor, years=None)
+    # GARCH(1,1) volatility process with constant mean
+    am = arch_model(returns)
+    res = am.fit(update_freq=0, disp='off')
+    return (res.conditional_volatility * math.sqrt(252), res)
+
+def lr_var(res):
+    """ computes long run variance of the garch process
+
+    .. math::
+
+    \sigma^2=\frac{\omega}{1-\sum_{i=1}^p \alpha_i + \sum_{i=1}^q \beta_i}
+
+    """
+    names = res.model.volatility.parameter_names()
+    ## names[0] is omega, rest is alpha[1],..., alpha[p], beta[1],...,beta[q]
+    var = res.params[names[0]]/(1 - res.params[names[1:]])
+    return math.sqrt(var * 252)
+
+def atm_vol_fun(v, ref_is_price=False, moneyness=0.2):
+    f = interp1d(v.strike.values, v.vol.values, fill_value='extrapolate')
+    atm_val = v['fwdspread'].iat[0]
+    otm_val = atm_val * (1 + moneyness) ## doesn't make sense for HY
+    return pd.Series(f(np.array([atm_val, otm_val])), index = ['atm_vol', 'otm_vol'])
+
+def atm_vol(index, series, moneyness=0.2):
+    df =  pd.read_sql_query('SELECT quotedate, expiry, strike, vol from swaption_quotes ' \
+                            'WHERE index = %s and series = %s',
+                            serenitasdb, index_col=['quotedate', 'expiry'],
+                            params = (index.upper(), series))
+    index_data = pd.read_sql_query(
+        'SELECT quotedate, expiry, fwdspread from swaption_ref_quotes ' \
+        'WHERE index= %s and series = %s',
+        serenitasdb, index_col = ['quotedate', 'expiry'],
+        params = (index.upper(), series))
+
+    df = df.join(index_data)
+    df = df.groupby(level=['quotedate', 'expiry']).filter(lambda x: len(x)>=2)
+    df = df.groupby(level=['quotedate', 'expiry']).apply(atm_vol_fun, index=="HY", moneyness)
+    df = df.reset_index(level=-1) #move expiry back to the column
+    return df
+
+def atm_vol_date(index, date):
+    df =  pd.read_sql_query('SELECT quotedate, series, expiry, strike, vol ' \
+                            'FROM swaption_quotes ' \
+                            'WHERE index = %s and quotedate >= %s',
+                            serenitasdb,
+                            index_col=['quotedate', 'expiry', 'series'],
+                            params=(index.upper(), date))
+    index_data = pd.read_sql_query(
+        'SELECT quotedate, expiry, series,  fwdspread FROM swaption_ref_quotes ' \
+        'WHERE index= %s and quotedate >= %s',
+        serenitasdb, index_col=['quotedate', 'expiry', 'series'],
+        params = (index.upper(), date))
+    df = df.join(index_data)
+    df = df.groupby(df.index).filter(lambda x: len(x)>=2)
+    df = df.groupby(level=['quotedate', 'expiry', 'series']).apply(atm_vol_fun)
+    df = df.reset_index(level=['expiry', 'series']) #move expiry and series back to the columns
+    return df
+
+def rolling_vol(df, col='atm_vol', term=[3]):
+    """compute the rolling volatility for various terms"""
+    df = df.groupby(df.index).filter(lambda x: len(x)>2)
+    def aux(s, col, term):
+        k = s.index[0]
+        f = interp1d(s.expiry.values.astype('float'), s[col].values, fill_value='extrapolate')
+        x = np.array([(k + pd.DateOffset(months=t)).to_datetime64().astype('float') \
+                      for t in term])
+        return pd.Series(f(x), index=[str(t)+'m' for t in term])
+
+    df = df.groupby(level='quotedate').apply(aux, col, term)
+    # MS quotes don't have fwdspread so they end up as NA
+    return df.dropna()
+
+def vol_var(percentile=0.99, index='IG'):
+    df = atm_vol_date("IG", datetime.date(2014, 6, 11))
+    df = rolling_vol(df, term=[1,2,3])
+    df = df.sort_index()
+    df = df.groupby(df.index.date).nth(-1)
+    return df.pct_change().quantile(percentile)
+
+def get_index_spread(index, series, date, conn):
+    with conn.cursor() as c:
+        c.execute("SELECT closespread from index_quotes " \
+                  "WHERE index=%s and series=%s and date=%s and tenor='5yr'",
+                  (index, series, date))
+        try:
+            spread, = c.fetchone()
+        except TypeError:
+            spread = None
+    conn.commit()
+    return spread
+
+def get_index_ref(index, series, date, expiry, conn):
+    with conn.cursor() as c:
+        c.execute("SELECT ref, fwdspread from swaption_ref_quotes " \
+                  "WHERE index=%s and series=%s and quotedate::date=%s "\
+                  "AND expiry=%s ORDER BY quotedate desc",
+                  (index, series, date, expiry))
+        try:
+            ref, fwdspread = c.fetchone()
+        except TypeError:
+            ref, fwdspread = None, None
+    conn.commit()
+    return ref, fwdspread
+
+def get_option_pnl(strike, expiry, index, series, start_date, engine):
+    for s in [strike, strike+2.5, strike-2.5, strike+5]:
+        df = pd.read_sql_query("SELECT quotedate, (pay_bid+pay_offer)/2 AS pay_mid, " \
+                               "(rec_bid+rec_offer)/2 AS rec_mid FROM swaption_quotes " \
+                               "WHERE strike=%s and expiry=%s and index=%s and series=%s" \
+                               "and quotedate>=%s", engine,
+                               params=(s, expiry, index, series, start_date),
+                               index_col='quotedate',
+                               parse_dates=['quotedate'])
+        if not df.empty and df.index[0].date() == start_date:
+            strike = s
+            break
+    else:
+        raise ValueError("Couldn't find data starting from that date")
+
+    if not pd.api.types.is_datetime64tz_dtype(df.index):
+        df.index = df.index.tz_localize('utc')
+
+    df = df.groupby(df.index.normalize()).nth(-1)
+    if expiry < datetime.date.today():
+        spread = get_index_spread(index, series, expiry, engine.raw_connection())
+        underlying = Index.from_name(index, series, "5yr", expiry, 1e4)
+        underlying.spread = spread
+        pv = underlying.pv
+        underlying.spread = strike
+        if spread > strike:
+            pay_mid, rec_mid = pv-underlying.pv, 0
+        else:
+            pay_mid, rec_mid = 0, underlying.pv - pv
+            pv = underlying.pv
+        df = df.append(pd.DataFrame([[pay_mid, rec_mid]],
+                                    columns=['pay_mid', 'rec_mid'],
+                                    index=[pd.Timestamp(expiry, tz='UTC')]))
+    return df, strike
+
+def sell_vol_strategy(index="IG", months=3):
+    engine = dbengine('serenitasdb')
+    conn = engine.raw_connection()
+    with conn.cursor() as c1, conn.cursor() as c2:
+        c1.execute("SELECT DISTINCT series, expiry FROM " \
+                  "swaption_quotes ORDER BY expiry, series desc")
+        d = {}
+        for series, expiry in c1:
+            start_date = BDay().rollback(expiry - pd.DateOffset(months=months)).date()
+            if start_date > datetime.date.today():
+                break
+            c2.execute("SELECT max(quotedate::date) FROM swaption_quotes WHERE " \
+                       "index=%s AND series=%s AND expiry=%s AND quotedate<=%s",
+                       (index, series, expiry, start_date))
+            actual_start_date, = c2.fetchone()
+            if actual_start_date is None or (start_date - actual_start_date).days > 5:
+                continue
+            ref, fwdspread = get_index_ref(index, series, actual_start_date, expiry, conn)
+            if fwdspread is None:
+                fwdspread = ref + months / 50 #TODO: use actual values
+            strike = round(fwdspread/2.5) * 2.5
+            pnl, strike = get_option_pnl(strike, expiry, index, series, actual_start_date, engine)
+            d[(series, strike, expiry)] = pnl
+    conn.commit()
+    return d
+
+def aggregate_trades(d):
+    r = pd.Series()
+    for v in d.values():
+        r = r.add(-v.sum(1).diff().dropna(), fill_value=0)
+    return r
+
+def compute_allocation(df):
+    Sigma = df.cov().values
+    gamma = cvxpy.Parameter(sign='positive')
+    mu = df.mean().values
+    w = cvxpy.Variable(3)
+    ret = mu.T*w
+    risk = cvxpy.quad_form(w, Sigma)
+    prob = cvxpy.Problem(cvxpy.Maximize(ret - gamma * risk),
+                         [cvxpy.sum_entries(w) == 1,
+                          w >= -2,
+                           w <= 2])
+
+    gamma_x = np.linspace(0, 0.02, 500)
+    W = np.empty((3, gamma_x.size))
+    for i, val in enumerate(gamma_x):
+        gamma.value = val
+        prob.solve()
+        W[:,i] = np.asarray(w.value).squeeze()
+
+    fund_return = mu @ W
+    fund_vol= np.array([math.sqrt(W[:,i] @ Sigma @W[:,i]) for i in range(gamma_x.size)])
+    return (W, fund_return, fund_vol)
+
+if __name__ == "__main__":
+    d1 = sell_vol_strategy(months=1)
+    d2 = sell_vol_strategy(months=2)
+    d3 = sell_vol_strategy(months=3)
+    all_tenors = pd.concat([aggregate_trades(d) for d in [d1, d2, d3]], axis=1)
+    all_tenors.columns = ['1m', '2m', '3m']
+    all_tenors['optimal'] = ((1.2*all_tenors['1m']).
+                             sub(1.2*all_tenors['2m'], fill_value=0).
+                             add(all_tenors['3m'], fill_value=0))