diff options
| -rw-r--r-- | python/exploration/beta_trade.py (renamed from python/beta_trade.py) | 29 | ||||
| -rw-r--r-- | python/exploration/option_trades.py (renamed from python/option_trades.py) | 113 | ||||
| -rw-r--r-- | python/index_data.py | 106 |
3 files changed, 129 insertions, 119 deletions
diff --git a/python/beta_trade.py b/python/exploration/beta_trade.py index 34058090..1e86a97f 100644 --- a/python/beta_trade.py +++ b/python/exploration/beta_trade.py @@ -1,16 +1,21 @@ +import math import pandas as pd import feather -from option_trades import index_price_returns +from index_data import index_returns from arch import arch_model +from statsmodels.graphics.tsaplots import plot_acf, plot_pacf -returns = pd.concat([index_price_returns(index=i) for i in ['IG', 'HY']], axis=1) -returns.columns = ['ig', 'hy'] -feather.write_dataframe(returns.reset_index(), "/home/share/CorpCDOs/data/index_returns.fth") -model = pd.ols(y = returns.hy, x=returns.ig) -beta = model.beta.x +returns = index_returns(index=['IG', 'HY'], tenor='5yr') +returns = (returns. + reset_index(level='series'). + groupby(level=['date','index']). + nth(-1)) ## lastest returns -am = arch_model(returns.ig.dropna()) -res = am.fit() +returns = (returns. + set_index('series', append=True)['price_return']. + unstack(level='index')) +returns.columns = [col.lower() for col in returns.columns] +feather.write_dataframe(returns.reset_index(), "/home/share/CorpCDOs/data/index_returns.fth") # three ways of computing the volatility # 20 days simple moving average @@ -28,8 +33,10 @@ vol = pd.concat([vol_sma, vol_ewma, vol_garch], axis=1) beta_ewma = (returns. ewm(span=20). cov(). - apply(lambda df:df.loc['ig', 'hy']/df.loc['ig', 'ig'], + apply(lambda df:df.at['ig', 'hy']/df.at['ig', 'ig'], ('minor', 'major'))) -feather.write_dataframe(beta_ewma.reset_index(), - "/home/share/CorpCDOs/data/beta.fth") +resids = returns.ig-beta_ewma*returns.hy + +# feather.write_dataframe(beta_ewma.reset_index(), +# "/home/share/CorpCDOs/data/beta.fth") diff --git a/python/option_trades.py b/python/exploration/option_trades.py index 7aae31ba..3edda8e4 100644 --- a/python/option_trades.py +++ b/python/exploration/option_trades.py @@ -9,105 +9,16 @@ from arch import arch_model from db import dbengine, dbconn from scipy.interpolate import interp1d from analytics import Index -from dates import bond_cal +from index_data import index_returns serenitasdb = dbengine('serenitasdb') -def get_daily_pnl(index, series, tenor, coupon=1): - sql_str = "SELECT date, adjcloseprice AS close, closespread AS spread, duration, theta FROM index_quotes " \ - "WHERE index=%s and series=%s and tenor = %s" - df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'], - index_col=['date'], params=(index, series, tenor)) - df.sort_index(inplace=True) - df['dt'] = df.index.to_series().diff().astype('timedelta64[D]') - df['pnl'] = df['close'].ffill().diff() + df.dt/360*coupon - return df - -def daily_spreads(index, series, tenor): - """computes daily spreads returns - - Parameters - ---------- - index : string - series : int - tenor : string - """ - sql_str = "SELECT date, closespread AS spread FROM index_quotes " \ - "WHERE index=%s and series=%s and tenor = %s" - df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'], index_col=['date'], - params=(index.upper(), series, tenor)) - df.sort_index(inplace=True) - return df.spread.pct_change().dropna() - -def insert_quotes(): - # backpopulate some version i+1 quotes one day before they start trading so that - # we get continuous time series in the returns - dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05','2015-06-23']) - df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " \ - "WHERE index='HY' AND tenor='5yr' " \ - "ORDER BY date, series DESC, version DESC", - serenitasdb, parse_dates=['date'], index_col=['date']) - df = df.loc[dates] - for tup in df.itertuples(): - result = serenitasdb.execute("SELECT indexfactor, cumulativeloss FROM index_version " \ - "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" \ - "ORDER BY version", - (tup.series, tup.version, tup.version+1)) - factor1, cumloss1 = result.fetchone() - factor2, cumloss2 = result.fetchone() - recovery = 1-(cumloss2-cumloss1) - version2_price = (factor1 * tup.closeprice - 100*recovery)/factor2 - print(version2_price) - serenitasdb.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" \ - "VALUES(%s, %s, %s, %s, %s, %s)", - (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price)) - -def index_returns(date=None, years=3, index="IG", tenor="5yr"): - """computes on the run spread returns""" - if date is None: - date = (pd.Timestamp.now() - pd.DateOffset(years=years)).date() - sql_str = "SELECT date, series, version, closespread AS spread FROM index_quotes " \ - "WHERE index=%s and date>=%s and tenor = %s" - df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'], - index_col=['date', 'series', 'version'], - params=(index.upper(), date, tenor)) - df.sort_index(inplace=True) - return (df.groupby(level=['series', 'version']). - transform(lambda x: x.pct_change()). - dropna(). - groupby(level='date'). - last()) - - -def index_price_returns(date=None, years=3, index="IG", tenor="5yr"): - """computes on the run price returns taking coupons into account""" - if date is None: - date = (pd.Timestamp.now() - pd.DateOffset(years=years)).date() - sql_str = "SELECT date, series, version, closeprice AS price FROM index_quotes " \ - "WHERE index=%s and date>=%s and tenor = %s" - df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'], - index_col=['date', 'series', 'version'], - params=(index.upper(), date, tenor)) - df.sort_index(inplace=True) - ## get rid of holidays - dates = df.index.levels[0] - holidays = bond_cal().holidays(start=dates[0], end=dates[-1]) - df = df.loc(axis=0)[dates.difference(holidays),:,:] - - def returns(df, coupon=1): - df['returns'] = df.price.pct_change() + \ - coupon * df.index.levels[0].to_series().diff().dt.days/360/df.price - return df - return (df.groupby(level=['series', 'version']). - apply(returns, (1 if index=='IG' else 5,)). - groupby(level='date')['returns'].last()) - -def realized_vol(index, series, tenor, date=None, years=None): +def realized_vol(index, series, tenor='5yr', date=None, years=None): """computes the realized spread volatility""" if date is None: if years is None: raise ValueError("need to provide at least one of date or years") date = (pd.Timestamp.now() - pd.DateOffset(years=years)).date() - returns = daily_spreads(index, series, tenor) + returns = index_returns(index=index, series=series, tenor=tenor, years=None) # GARCH(1,1) volatility process with constant mean am = arch_model(returns) res = am.fit(update_freq=0, disp='off') @@ -185,21 +96,9 @@ def vol_var(percentile=0.99, index='IG'): df = atm_vol_date("IG", datetime.date(2014, 6, 11)) df = rolling_vol(df, term=[1,2,3]) df = df.sort_index() - df = df.groupby(df.index.date).last() + df = df.groupby(df.index.date).nth(-1) return df.pct_change().quantile(percentile) -def index_rolling_returns(date=None, years=3, index="IG", tenor="5yr"): - """computes on the run returns""" - if date is None: - date = (pd.Timestamp.now() - pd.DateOffset(years=years)).date() - sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \ - "WHERE index=%s and date>=%s and tenor = %s" - df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'], index_col=['date', 'series'], - params=(index.upper(), date, tenor)) - df.sort_index(inplace=True) - df = df.groupby(level='series').pct_change() - return df.groupby(level='date').last() - def get_index_spread(index, series, date, conn): with conn.cursor() as c: c.execute("SELECT closespread from index_quotes " \ @@ -243,7 +142,7 @@ def get_option_pnl(strike, expiry, index, series, start_date, engine): if not pd.api.types.is_datetime64tz_dtype(df.index): df.index = df.index.tz_localize('utc') - df = df.groupby(df.index.normalize()).last() + df = df.groupby(df.index.normalize()).nth(-1) if expiry < datetime.date.today(): spread = get_index_spread(index, series, expiry, engine.raw_connection()) underlying = Index.from_name(index, series, "5yr", expiry, 1e4) @@ -292,7 +191,6 @@ def aggregate_trades(d): r = r.add(-v.sum(1).diff().dropna(), fill_value=0) return r - def compute_allocation(df): Sigma = df.cov().values gamma = cvxpy.Parameter(sign='positive') @@ -316,7 +214,6 @@ def compute_allocation(df): fund_vol= np.array([math.sqrt(W[:,i] @ Sigma @W[:,i]) for i in range(gamma_x.size)]) return (W, fund_return, fund_vol) - if __name__ == "__main__": d1 = sell_vol_strategy(months=1) d2 = sell_vol_strategy(months=2) diff --git a/python/index_data.py b/python/index_data.py new file mode 100644 index 00000000..bf772100 --- /dev/null +++ b/python/index_data.py @@ -0,0 +1,106 @@ +from db import dbengine, dbconn +from dates import bond_cal + +import datetime +import pandas as pd +serenitasdb = dbengine('serenitasdb') + +def insert_quotes(): + # backpopulate some version i+1 quotes one day before they start trading so that + # we get continuous time series in the rb + eturns + dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05','2015-06-23']) + df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " \ + "WHERE index='HY' AND tenor='5yr' " \ + "ORDER BY date, series DESC, version DESC", + serenitasdb, parse_dates=['date'], index_col=['date']) + df = df.loc[dates] + for tup in df.itertuples(): + result = serenitasdb.execute("SELECT indexfactor, cumulativeloss FROM index_version " \ + "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" \ + "ORDER BY version", + (tup.series, tup.version, tup.version+1)) + factor1, cumloss1 = result.fetchone() + factor2, cumloss2 = result.fetchone() + recovery = 1-(cumloss2-cumloss1) + version2_price = (factor1 * tup.closeprice - 100*recovery)/factor2 + print(version2_price) + serenitasdb.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" \ + "VALUES(%s, %s, %s, %s, %s, %s)", + (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price)) + +def get_index_quotes(index=None, series=None, tenor=None, date=None, years=3): + args = locals().copy() + if args['years'] is not None: + args['date'] = (pd.Timestamp.now() - pd.DateOffset(years=years)).date() + del args['years'] + + def make_str(key, val): + if isinstance(val, list): + op = "IN" + return "{} IN %({})s".format(key, key) + elif isinstance(val, datetime.date): + op = ">=" + else: + op = "=" + return "{} {} %({})s".format(key, op, key) + + where_clause = " AND ".join(make_str(k, v) + for k, v in args.items() if v is not None) + sql_str = "SELECT * FROM index_quotes" + if where_clause: + sql_str = " WHERE ".join([sql_str, where_clause]) + + def make_params(args): + return {k: tuple(v) if isinstance(v, list) else v + for k, v in args.items() if v is not None} + + df = pd.read_sql_query(sql_str, serenitasdb, parse_dates=['date'], + index_col=['date', 'index', 'series', 'version', 'tenor'], + params = make_params(args)) + df.sort_index(inplace=True) + ## get rid of US holidays + dates = df.index.levels[0] + if index in ['IG', 'HY']: + holidays = bond_cal().holidays(start=dates[0], end=dates[-1]) + df = df.loc(axis=0)[dates.difference(holidays),:,:] + return df + +def index_returns(df=None, index=None, series=None, tenor=None, date=None, years=3): + """computes daily spreads and price returns + + Parameters + ---------- + df : pandas.DataFrame + index : str or List[str], optional + index type, one of 'IG', 'HY', 'EU', 'XO' + series : int or List[int], optional + tenor : str or List[str], optional + tenor in years e.g: '3yr', '5yr' + date : datetime.date, optional + starting date + years : int, optional + limits many years do we go back starting from today. + + """ + if df is None: + df = get_index_quotes(index, series, tenor, date, years) + df = (df. + groupby(level=['index', 'series', 'version', 'tenor']) + [['closespread','closeprice']]. + pct_change()) + df.columns = ['spread_return', 'price_return'] + df = df.groupby(level=['date', 'index', 'series', 'tenor']).nth(-1) + coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon FROM " \ + "index_maturity WHERE coupon is NOT NULL", serenitasdb, + index_col=['index', 'series', 'tenor']) + def add_accrued(df): + coupon = coupon_data.loc[df.index[0][1:],'coupon'] * 1e-4 + accrued = (df.index.levels[0].to_series().diff(). + astype('timedelta64[D]')/360 * coupon) + return df + accrued + + df['price_return'] = (df. + groupby(level=['index', 'series', 'tenor'])['price_return']. + transform(add_accrued)) + return df |
