diff options
Diffstat (limited to 'python/exploration/option_trades.py')
| -rw-r--r-- | python/exploration/option_trades.py | 235 |
1 files changed, 147 insertions, 88 deletions
diff --git a/python/exploration/option_trades.py b/python/exploration/option_trades.py index 3e19d185..c0ebaab3 100644 --- a/python/exploration/option_trades.py +++ b/python/exploration/option_trades.py @@ -11,17 +11,21 @@ from scipy.interpolate import interp1d from analytics import CreditIndex, ForwardIndex from analytics.index_data import index_returns -serenitasdb = dbengine('serenitasdb') +serenitasdb = dbengine("serenitasdb") -def realized_vol(index, series=None, tenor='5yr', from_date=None, years=3, return_type='spread'): + +def realized_vol( + index, series=None, tenor="5yr", from_date=None, years=3, return_type="spread" +): """computes the realized spread volatility""" returns = index_returns(None, index, series, tenor, from_date, years) - returns = returns.groupby(level='date').nth(-1).dropna() + returns = returns.groupby(level="date").nth(-1).dropna() # GARCH(1,1) volatility process with constant mean, scale to help with fitting scale = 10 am = arch_model(scale * returns[f"{return_type}_return"]) - res = am.fit(update_freq=0, disp='off') - return (res.conditional_volatility * math.sqrt(252)/scale, res) + res = am.fit(update_freq=0, disp="off") + return (res.conditional_volatility * math.sqrt(252) / scale, res) + def lr_var(res): r""" computes long run variance of the garch process @@ -33,74 +37,102 @@ def lr_var(res): """ names = res.model.volatility.parameter_names() ## names[0] is omega, rest is alpha[1],..., alpha[p], beta[1],...,beta[q] - var = res.params[names[0]]/(1 - res.params[names[1:]]) + var = res.params[names[0]] / (1 - res.params[names[1:]]) return math.sqrt(var * 252) + def atm_vol_calc(df, index_type, moneyness): + df = df.set_index("ref", append=True) r = np.empty((len(df.index.unique()), 3)) i = 0 index_keys = [] - for s, g1 in df.groupby(level='series'): - index = CreditIndex(index_type, s, '5yr') - for date, g2 in g1.groupby(pd.Grouper(level='quotedate', freq='D')): + for s, g1 in df.groupby(level="series"): + index = CreditIndex(index_type, s, "5yr", value_date=g1.index[0][0]) + for date, g2 in g1.groupby(pd.Grouper(level="quotedate", freq="D")): if not g2.empty: index.value_date = date.date() - for (ref, expiry), g3 in g2.reset_index('expiry').groupby(['ref', 'expiry']): + for (ref, expiry), g3 in g2.reset_index("expiry").groupby( + ["ref", "expiry"] + ): index.ref = ref - atm_val = forward_spread = ForwardIndex(index, expiry, False).forward_spread + atm_val = forward_spread = ForwardIndex( + index, expiry, False + ).forward_spread otm_val = atm_val * (1 + moneyness) if index._quote_is_price: index.spread = atm_val atm_val = index.price index.spread = otm_val otm_val = index.price - for quotedate, v in g3.groupby(level='quotedate'): - f = interp1d(v.strike.values, v.vol.values, fill_value='extrapolate') + for quotedate, v in g3.groupby(level="quotedate"): + f = interp1d( + v.strike.values, v.vol.values, fill_value="extrapolate" + ) r[i, 0] = forward_spread r[i, 1:] = f([atm_val, otm_val]) i += 1 index_keys.append((quotedate, expiry, s)) - df = pd.DataFrame(data=r, - index=pd.MultiIndex.from_tuples(index_keys, - names=['quotedate', 'expiry', 'series']), - columns=['forward_spread', 'atm_vol', 'otm_vol']) - df['T'] = df.index.get_level_values('expiry').values.astype('datetime64[D]') - \ - df.index.get_level_values('quotedate').values.astype('datetime64[D]') - df['T'] = df['T'].dt.days / 365 + df = pd.DataFrame( + data=r, + index=pd.MultiIndex.from_tuples( + index_keys, names=["quotedate", "expiry", "series"] + ), + columns=["forward_spread", "atm_vol", "otm_vol"], + ) + df["T"] = df.index.get_level_values("expiry").values.astype( + "datetime64[D]" + ) - df.index.get_level_values("quotedate").values.astype("datetime64[D]") + df["T"] = df["T"].dt.days / 365 return df + def atm_vol(index, date, series=None, moneyness=0.2): - extra_filter = '' + extra_filter = "" params = (index.upper(), date) if series: - extra_filter = ' AND series=%s' + extra_filter = " AND series=%s" params = params + (series,) - sql_str = "SELECT * from swaption_ref_quotes JOIN swaption_quotes " \ - "USING (ref_id) WHERE index=%s " \ - f"and quotedate>=%s {extra_filter} ORDER BY quotedate ASC" + sql_str = ( + "SELECT * from swaption_ref_quotes JOIN swaption_quotes " + "USING (ref_id) WHERE index=%s " + f"and quotedate>=%s {extra_filter} ORDER BY quotedate ASC" + ) df = pd.read_sql_query(sql_str, serenitasdb, params=params) - df.quotedate = pd.to_datetime(df.quotedate, utc=True).dt.tz_convert('America/New_York') - df = df.set_index(['quotedate', 'index', 'series', 'expiry']) - df = df.groupby(level=['quotedate', 'index', 'series', 'expiry']).filter(lambda x: len(x)>2) + df.quotedate = pd.to_datetime(df.quotedate, utc=True).dt.tz_convert( + "America/New_York" + ) + df = df.set_index(["quotedate", "index", "series", "expiry"]) + df = df.groupby(level=["quotedate", "index", "series", "expiry"]).filter( + lambda x: len(x) > 2 + ) return atm_vol_calc(df, index, moneyness) -def rolling_vol(df, col='atm_vol', term=[3]): + +def rolling_vol(df, col="atm_vol", term=[3]): """compute the rolling volatility for various terms""" - df = df.reset_index(level=['expiry', 'series']) - df = df.groupby(df.index).filter(lambda x: len(x)>2) + df = df.reset_index(level=["expiry", "series"]) + df = df.groupby(df.index).filter(lambda x: len(x) > 2) + def aux(s, col, term): k = s.index[0] - f = interp1d(s.expiry.values.astype('float'), s[col].values, fill_value='extrapolate') - x = np.array([(k + pd.DateOffset(months=t)).to_datetime64().astype('float') \ - for t in term]) - return pd.Series(f(x), index=[str(t)+'m' for t in term]) + f = interp1d( + s.expiry.values.astype("float"), s[col].values, fill_value="extrapolate" + ) + x = np.array( + [ + (k + pd.DateOffset(months=t)).to_datetime64().astype("float") + for t in term + ] + ) + return pd.Series(f(x), index=[str(t) + "m" for t in term]) - df = df.groupby(level='quotedate').apply(aux, col, term) + df = df.groupby(level="quotedate").apply(aux, col, term) # MS quotes don't have fwdspread so they end up as NA return df.dropna() -def vol_var(percentile=0.975, index='IG', start_date=datetime.date(2014, 6, 11)): + +def vol_var(percentile=0.975, index="IG", start_date=datetime.date(2014, 6, 11)): """compute lo and hi percentiles of atm volatility daily change Probably overestimates: @@ -108,16 +140,19 @@ def vol_var(percentile=0.975, index='IG', start_date=datetime.date(2014, 6, 11)) - we should group it by series """ df = atm_vol(index, start_date) - df = rolling_vol(df, term=[1,2,3]) + df = rolling_vol(df, term=[1, 2, 3]) df = df.sort_index() df = df.groupby(df.index.date).nth(-1) - return df.diff().quantile([1-percentile, percentile]) + return df.diff().quantile([1 - percentile, percentile]) + def get_index_spread(index, series, date, conn): with conn.cursor() as c: - c.execute("SELECT closespread from index_quotes " \ - "WHERE index=%s and series=%s and date=%s and tenor='5yr'", - (index, series, date)) + c.execute( + "SELECT closespread from index_quotes " + "WHERE index=%s and series=%s and date=%s and tenor='5yr'", + (index, series, date), + ) try: spread, = c.fetchone() except TypeError: @@ -125,12 +160,15 @@ def get_index_spread(index, series, date, conn): conn.commit() return spread + def get_index_ref(index, series, date, expiry, conn): with conn.cursor() as c: - c.execute("SELECT ref, fwdspread from swaption_ref_quotes " \ - "WHERE index=%s and series=%s and quotedate::date=%s "\ - "AND expiry=%s ORDER BY quotedate desc", - (index, series, date, expiry)) + c.execute( + "SELECT ref, fwdspread from swaption_ref_quotes " + "WHERE index=%s and series=%s and quotedate::date=%s " + "AND expiry=%s ORDER BY quotedate desc", + (index, series, date, expiry), + ) try: ref, fwdspread = c.fetchone() except TypeError: @@ -138,15 +176,19 @@ def get_index_ref(index, series, date, expiry, conn): conn.commit() return ref, fwdspread + def get_option_pnl(strike, expiry, index, series, start_date, engine): - for s in [strike, strike+2.5, strike-2.5, strike+5]: - df = pd.read_sql_query("SELECT quotedate, (pay_bid+pay_offer)/2 AS pay_mid, " \ - "(rec_bid+rec_offer)/2 AS rec_mid FROM swaption_quotes " \ - "WHERE strike=%s and expiry=%s and index=%s and series=%s" \ - "and quotedate>=%s", engine, - params=(s, expiry, index, series, start_date), - index_col='quotedate', - parse_dates=['quotedate']) + for s in [strike, strike + 2.5, strike - 2.5, strike + 5]: + df = pd.read_sql_query( + "SELECT quotedate, (pay_bid+pay_offer)/2 AS pay_mid, " + "(rec_bid+rec_offer)/2 AS rec_mid FROM swaption_quotes " + "WHERE strike=%s and expiry=%s and index=%s and series=%s" + "and quotedate>=%s", + engine, + params=(s, expiry, index, series, start_date), + index_col="quotedate", + parse_dates=["quotedate"], + ) if not df.empty and df.index[0].date() == start_date: strike = s break @@ -154,7 +196,7 @@ def get_option_pnl(strike, expiry, index, series, start_date, engine): raise ValueError("Couldn't find data starting from that date") if not pd.api.types.is_datetime64tz_dtype(df.index): - df.index = df.index.tz_localize('utc') + df.index = df.index.tz_localize("utc") df = df.groupby(df.index.normalize()).nth(-1) if expiry < datetime.date.today(): @@ -164,70 +206,86 @@ def get_option_pnl(strike, expiry, index, series, start_date, engine): pv = underlying.pv underlying.spread = strike if spread > strike: - pay_mid, rec_mid = pv-underlying.pv, 0 + pay_mid, rec_mid = pv - underlying.pv, 0 else: pay_mid, rec_mid = 0, underlying.pv - pv pv = underlying.pv - df = df.append(pd.DataFrame([[pay_mid, rec_mid]], - columns=['pay_mid', 'rec_mid'], - index=[pd.Timestamp(expiry, tz='UTC')])) + df = df.append( + pd.DataFrame( + [[pay_mid, rec_mid]], + columns=["pay_mid", "rec_mid"], + index=[pd.Timestamp(expiry, tz="UTC")], + ) + ) return df, strike + def sell_vol_strategy(index="IG", months=3): - engine = dbengine('serenitasdb') - conn = engine.raw_connection() + conn = serenitasdb.raw_connection() with conn.cursor() as c1, conn.cursor() as c2: - c1.execute("SELECT DISTINCT series, expiry FROM " \ - "swaption_quotes ORDER BY expiry, series desc") + c1.execute( + "SELECT DISTINCT series, expiry FROM " + "swaption_quotes ORDER BY expiry, series desc" + ) d = {} for series, expiry in c1: start_date = BDay().rollback(expiry - pd.DateOffset(months=months)).date() if start_date > datetime.date.today(): break - c2.execute("SELECT max(quotedate::date) FROM swaption_quotes WHERE " \ - "index=%s AND series=%s AND expiry=%s AND quotedate<=%s", - (index, series, expiry, start_date)) + c2.execute( + "SELECT max(quotedate::date) FROM swaption_quotes WHERE " + "index=%s AND series=%s AND expiry=%s AND quotedate<=%s", + (index, series, expiry, start_date), + ) actual_start_date, = c2.fetchone() if actual_start_date is None or (start_date - actual_start_date).days > 5: continue - ref, fwdspread = get_index_ref(index, series, actual_start_date, expiry, conn) + ref, fwdspread = get_index_ref( + index, series, actual_start_date, expiry, conn + ) if fwdspread is None: - fwdspread = ref + months / 50 #TODO: use actual values - strike = round(fwdspread/2.5) * 2.5 - pnl, strike = get_option_pnl(strike, expiry, index, series, actual_start_date, engine) + fwdspread = ref + months / 50 # TODO: use actual values + strike = round(fwdspread / 2.5) * 2.5 + pnl, strike = get_option_pnl( + strike, expiry, index, series, actual_start_date, engine + ) d[(series, strike, expiry)] = pnl conn.commit() return d + def aggregate_trades(d): r = pd.Series() for v in d.values(): r = r.add(-v.sum(1).diff().dropna(), fill_value=0) return r + def compute_allocation(df): Sigma = df.cov().values - gamma = cvxpy.Parameter(sign='positive') + gamma = cvxpy.Parameter(sign="positive") mu = df.mean().values w = cvxpy.Variable(3) - ret = mu.T*w + ret = mu.T * w risk = cvxpy.quad_form(w, Sigma) - prob = cvxpy.Problem(cvxpy.Maximize(ret - gamma * risk), - [cvxpy.sum_entries(w) == 1, - w >= -2, - w <= 2]) + prob = cvxpy.Problem( + cvxpy.Maximize(ret - gamma * risk), [cvxpy.sum_entries(w) == 1, w >= -2, w <= 2] + ) gamma_x = np.linspace(0, 0.02, 500) W = np.empty((3, gamma_x.size)) for i, val in enumerate(gamma_x): gamma.value = val prob.solve() - W[:,i] = np.asarray(w.value).squeeze() + W[:, i] = np.asarray(w.value).squeeze() fund_return = mu @ W - fund_vol= np.array([math.sqrt(W[:,i] @ Sigma @W[:,i]) for i in range(gamma_x.size)]) + fund_vol = np.array( + [math.sqrt(W[:, i] @ Sigma @ W[:, i]) for i in range(gamma_x.size)] + ) return (W, fund_return, fund_vol) + if __name__ == "__main__": # d1 = sell_vol_strategy(months=1) # d2 = sell_vol_strategy(months=2) @@ -239,22 +297,23 @@ if __name__ == "__main__": # add(all_tenors['3m'], fill_value=0)) import datetime import statsmodels.formula.api as smf + ## HY df = atm_vol("HY", datetime.date(2017, 3, 20)) - df['forward_spread'] *= 1e-4 - df['log_forward_spread'] = np.log(df['forward_spread']) - df['log_atm_vol'] = np.log(df['atm_vol']) - df_hy28 = df.xs(28, level='series') - results = smf.ols('log_atm_vol ~ log_forward_spread + T', data=df_hy28).fit() + df["forward_spread"] *= 1e-4 + df["log_forward_spread"] = np.log(df["forward_spread"]) + df["log_atm_vol"] = np.log(df["atm_vol"]) + df_hy28 = df.xs(28, level="series") + results = smf.ols("log_atm_vol ~ log_forward_spread + T", data=df_hy28).fit() beta_hy28 = 1 + results.params.log_forward_spread print(results.summary()) ## IG df = atm_vol("IG", datetime.date(2017, 3, 20)) - df['forward_spread'] *= 1e-4 - df['log_forward_spread'] = np.log(df['forward_spread']) - df['log_atm_vol'] = np.log(df['atm_vol']) - df_ig28 = df.xs(28, level='series') - results = smf.ols('log_atm_vol ~ log_forward_spread + T', data=df_ig28).fit() + df["forward_spread"] *= 1e-4 + df["log_forward_spread"] = np.log(df["forward_spread"]) + df["log_atm_vol"] = np.log(df["atm_vol"]) + df_ig28 = df.xs(28, level="series") + results = smf.ols("log_atm_vol ~ log_forward_spread + T", data=df_ig28).fit() beta_ig28 = 1 + results.params.log_forward_spread print(results.summary()) |
