diff options
Diffstat (limited to 'python/index_data.py')
| -rw-r--r-- | python/index_data.py | 176 |
1 files changed, 0 insertions, 176 deletions
diff --git a/python/index_data.py b/python/index_data.py deleted file mode 100644 index 730a4e8a..00000000 --- a/python/index_data.py +++ /dev/null @@ -1,176 +0,0 @@ -from db import dbengine, dbconn -from dates import bond_cal -import numpy as np - -from analytics.utils import roll_date, previous_twentieth -from pandas.tseries.offsets import BDay -from pyisda.curve import SpreadCurve, fill_curve -from multiprocessing import Pool -from yieldcurve import get_curve - -import datetime -import pandas as pd -_serenitas_engine = dbengine('serenitasdb') - -def insert_quotes(): - """ - backpopulate some version i+1 quotes one day before they start trading so that - we get continuous time series when we compute returns. - - We can also do it in sql as follows: - - INSERT INTO index_quotes(date, index, series, version, tenor, closeprice) - SELECT date, index, series, version+1, tenor, (factor1*closeprice-100*0.355)/factor2 - FROM index_quotes - WHERE index='HY' and series=23 and date='2017-02-02' - - """ - dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05','2015-06-23']) - df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " \ - "WHERE index='HY' AND tenor='5yr' " \ - "ORDER BY date, series DESC, version DESC", - _serenitas_engine, parse_dates=['date'], index_col=['date']) - df = df.loc[dates] - for tup in df.itertuples(): - result = serenitasdb.execute("SELECT indexfactor, cumulativeloss FROM index_version " \ - "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" \ - "ORDER BY version", - (tup.series, tup.version, tup.version+1)) - factor1, cumloss1 = result.fetchone() - factor2, cumloss2 = result.fetchone() - recovery = 1-(cumloss2-cumloss1) - version2_price = (factor1 * tup.closeprice - 100*recovery)/factor2 - print(version2_price) - _serenitas_engine.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" \ - "VALUES(%s, %s, %s, %s, %s, %s)", - (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price)) - -def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years=3): - args = locals().copy() - if args['years'] is not None: - args['date'] = (pd.Timestamp.now() - pd.DateOffset(years=years)).date() - del args['years'] - if args['from_date']: - args['date'] = args['from_date'] - del args['from_date'] - def make_str(key, val): - if isinstance(val, list): - op = "IN" - return "{} IN %({})s".format(key, key) - elif isinstance(val, datetime.date): - op = ">=" - else: - op = "=" - return "{} {} %({})s".format(key, op, key) - - where_clause = " AND ".join(make_str(k, v) - for k, v in args.items() if v is not None) - sql_str = "SELECT * FROM index_quotes" - if where_clause: - sql_str = " WHERE ".join([sql_str, where_clause]) - - def make_params(args): - return {k: tuple(v) if isinstance(v, list) else v - for k, v in args.items() if v is not None} - - df = pd.read_sql_query(sql_str, _serenitas_engine, parse_dates=['date'], - index_col=['date', 'index', 'series', 'version'], - params = make_params(args)) - df.tenor = df.tenor.astype('category', categories=("3yr", "5yr", "7yr", "10yr"), ordered=True) - df = df.set_index('tenor', append=True) - df.sort_index(inplace=True) - ## get rid of US holidays - dates = df.index.levels[0] - if index in ['IG', 'HY']: - holidays = bond_cal().holidays(start=dates[0], end=dates[-1]) - df = df.loc(axis=0)[dates.difference(holidays),:,:] - return df - -def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, years=3, per=1): - """computes daily spreads and price returns - - Parameters - ---------- - df : pandas.DataFrame - index : str or List[str], optional - index type, one of 'IG', 'HY', 'EU', 'XO' - series : int or List[int], optional - tenor : str or List[str], optional - tenor in years e.g: '3yr', '5yr' - date : datetime.date, optional - starting date - years : int, optional - limits many years do we go back starting from today. - per: int, optional - calculate returns across different time frames - - """ - if df is None: - df = get_index_quotes(index, series, tenor, from_date, years) - df = (df. - groupby(level=['index', 'series', 'tenor', 'version']) - [['closespread','closeprice']]. - pct_change(periods=per)) - df.columns = ['spread_return', 'price_return'] - df = df.groupby(level=['date', 'index', 'series', 'tenor']).nth(0) - coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon * 1e-4 AS coupon, maturity FROM " \ - "index_maturity WHERE coupon is NOT NULL", _serenitas_engine, - index_col=['index', 'series', 'tenor']) - - df = df.reset_index('date').join(coupon_data) - df['day_frac'] = (df.groupby(level=['index', 'series', 'tenor'])['date']. - diff(). - astype('timedelta64[D]') / 360) - df['price_return'] += df.day_frac * df.coupon - del df['day_frac'] - return df - -def get_singlenames_quotes(indexname, date): - conn = dbconn('serenitasdb') - with conn.cursor() as c: - c.execute("SELECT * FROM curve_quotes(%s, %s)", vars=(indexname, date)) - return [r for r in c] - -def build_curve(r, today_date, yc, start_date, step_in_date, value_date, end_dates): - spread_curve = 1e-4 * np.array(r['spread_curve'], dtype='float') - upfront_curve = 1e-2 * np.array(r['upfront_curve'], dtype='float') - recovery_curve = np.array(r['recovery_curve'], dtype='float') - try: - sc = SpreadCurve(today_date, yc, start_date, step_in_date, value_date, - end_dates, spread_curve, upfront_curve, recovery_curve, - ticker=r['cds_ticker']) - if len(sc) != end_dates.shape[0]: - sc = fill_curve(sc, end_dates) - except ValueError as e: - print(r[0], e) - return None - return sc - -def build_curves(quotes, args): - return [build_curve(q, *args) for q in quotes if q is not None] - -def build_curves_dist(quotes, args, workers=4): - ## about twice as fast as the non distributed version - ## non thread safe for some reason so need ProcessPool - with Pool(workers) as pool: - r = pool.starmap(build_curve, [(q, *args) for q in quotes], 30) - return r - -def get_singlenames_curves(index_type, series, trade_date): - end_dates = roll_date(trade_date, [0.5, 1, 2, 3, 4, 5, 7, 10], nd_array=True) - sn_quotes = get_singlenames_quotes("{}{}".format(index_type.lower(), series), - trade_date.date()) - currency = "EUR" if index_type in ["XO", "EU"] else "USD" - jp_yc = get_curve(trade_date, currency) - start_date = previous_twentieth(trade_date) - step_in_date = trade_date + datetime.timedelta(days=1) - value_date = pd.Timestamp(trade_date) + 3 * BDay() - args = (trade_date, jp_yc, start_date, step_in_date, value_date, end_dates) - curves = build_curves_dist(sn_quotes, args) - return curves, args - -def get_tranche_quotes(index_type, series, tenor, date=datetime.date.today()): - conn = dbconn('serenitasdb') - with conn.cursor() as c: - c.callproc("get_tranche_quotes", (index_type, series, tenor, date)) - return pd.DataFrame.from_records(dict(d) for d in c) |
