aboutsummaryrefslogtreecommitdiffstats
path: root/python/index_data.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/index_data.py')
-rw-r--r--python/index_data.py176
1 files changed, 0 insertions, 176 deletions
diff --git a/python/index_data.py b/python/index_data.py
deleted file mode 100644
index 730a4e8a..00000000
--- a/python/index_data.py
+++ /dev/null
@@ -1,176 +0,0 @@
-from db import dbengine, dbconn
-from dates import bond_cal
-import numpy as np
-
-from analytics.utils import roll_date, previous_twentieth
-from pandas.tseries.offsets import BDay
-from pyisda.curve import SpreadCurve, fill_curve
-from multiprocessing import Pool
-from yieldcurve import get_curve
-
-import datetime
-import pandas as pd
-_serenitas_engine = dbengine('serenitasdb')
-
-def insert_quotes():
- """
- backpopulate some version i+1 quotes one day before they start trading so that
- we get continuous time series when we compute returns.
-
- We can also do it in sql as follows:
-
- INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)
- SELECT date, index, series, version+1, tenor, (factor1*closeprice-100*0.355)/factor2
- FROM index_quotes
- WHERE index='HY' and series=23 and date='2017-02-02'
-
- """
- dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05','2015-06-23'])
- df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " \
- "WHERE index='HY' AND tenor='5yr' " \
- "ORDER BY date, series DESC, version DESC",
- _serenitas_engine, parse_dates=['date'], index_col=['date'])
- df = df.loc[dates]
- for tup in df.itertuples():
- result = serenitasdb.execute("SELECT indexfactor, cumulativeloss FROM index_version " \
- "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" \
- "ORDER BY version",
- (tup.series, tup.version, tup.version+1))
- factor1, cumloss1 = result.fetchone()
- factor2, cumloss2 = result.fetchone()
- recovery = 1-(cumloss2-cumloss1)
- version2_price = (factor1 * tup.closeprice - 100*recovery)/factor2
- print(version2_price)
- _serenitas_engine.execute("INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" \
- "VALUES(%s, %s, %s, %s, %s, %s)",
- (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price))
-
-def get_index_quotes(index=None, series=None, tenor=None, from_date=None, years=3):
- args = locals().copy()
- if args['years'] is not None:
- args['date'] = (pd.Timestamp.now() - pd.DateOffset(years=years)).date()
- del args['years']
- if args['from_date']:
- args['date'] = args['from_date']
- del args['from_date']
- def make_str(key, val):
- if isinstance(val, list):
- op = "IN"
- return "{} IN %({})s".format(key, key)
- elif isinstance(val, datetime.date):
- op = ">="
- else:
- op = "="
- return "{} {} %({})s".format(key, op, key)
-
- where_clause = " AND ".join(make_str(k, v)
- for k, v in args.items() if v is not None)
- sql_str = "SELECT * FROM index_quotes"
- if where_clause:
- sql_str = " WHERE ".join([sql_str, where_clause])
-
- def make_params(args):
- return {k: tuple(v) if isinstance(v, list) else v
- for k, v in args.items() if v is not None}
-
- df = pd.read_sql_query(sql_str, _serenitas_engine, parse_dates=['date'],
- index_col=['date', 'index', 'series', 'version'],
- params = make_params(args))
- df.tenor = df.tenor.astype('category', categories=("3yr", "5yr", "7yr", "10yr"), ordered=True)
- df = df.set_index('tenor', append=True)
- df.sort_index(inplace=True)
- ## get rid of US holidays
- dates = df.index.levels[0]
- if index in ['IG', 'HY']:
- holidays = bond_cal().holidays(start=dates[0], end=dates[-1])
- df = df.loc(axis=0)[dates.difference(holidays),:,:]
- return df
-
-def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, years=3, per=1):
- """computes daily spreads and price returns
-
- Parameters
- ----------
- df : pandas.DataFrame
- index : str or List[str], optional
- index type, one of 'IG', 'HY', 'EU', 'XO'
- series : int or List[int], optional
- tenor : str or List[str], optional
- tenor in years e.g: '3yr', '5yr'
- date : datetime.date, optional
- starting date
- years : int, optional
- limits many years do we go back starting from today.
- per: int, optional
- calculate returns across different time frames
-
- """
- if df is None:
- df = get_index_quotes(index, series, tenor, from_date, years)
- df = (df.
- groupby(level=['index', 'series', 'tenor', 'version'])
- [['closespread','closeprice']].
- pct_change(periods=per))
- df.columns = ['spread_return', 'price_return']
- df = df.groupby(level=['date', 'index', 'series', 'tenor']).nth(0)
- coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon * 1e-4 AS coupon, maturity FROM " \
- "index_maturity WHERE coupon is NOT NULL", _serenitas_engine,
- index_col=['index', 'series', 'tenor'])
-
- df = df.reset_index('date').join(coupon_data)
- df['day_frac'] = (df.groupby(level=['index', 'series', 'tenor'])['date'].
- diff().
- astype('timedelta64[D]') / 360)
- df['price_return'] += df.day_frac * df.coupon
- del df['day_frac']
- return df
-
-def get_singlenames_quotes(indexname, date):
- conn = dbconn('serenitasdb')
- with conn.cursor() as c:
- c.execute("SELECT * FROM curve_quotes(%s, %s)", vars=(indexname, date))
- return [r for r in c]
-
-def build_curve(r, today_date, yc, start_date, step_in_date, value_date, end_dates):
- spread_curve = 1e-4 * np.array(r['spread_curve'], dtype='float')
- upfront_curve = 1e-2 * np.array(r['upfront_curve'], dtype='float')
- recovery_curve = np.array(r['recovery_curve'], dtype='float')
- try:
- sc = SpreadCurve(today_date, yc, start_date, step_in_date, value_date,
- end_dates, spread_curve, upfront_curve, recovery_curve,
- ticker=r['cds_ticker'])
- if len(sc) != end_dates.shape[0]:
- sc = fill_curve(sc, end_dates)
- except ValueError as e:
- print(r[0], e)
- return None
- return sc
-
-def build_curves(quotes, args):
- return [build_curve(q, *args) for q in quotes if q is not None]
-
-def build_curves_dist(quotes, args, workers=4):
- ## about twice as fast as the non distributed version
- ## non thread safe for some reason so need ProcessPool
- with Pool(workers) as pool:
- r = pool.starmap(build_curve, [(q, *args) for q in quotes], 30)
- return r
-
-def get_singlenames_curves(index_type, series, trade_date):
- end_dates = roll_date(trade_date, [0.5, 1, 2, 3, 4, 5, 7, 10], nd_array=True)
- sn_quotes = get_singlenames_quotes("{}{}".format(index_type.lower(), series),
- trade_date.date())
- currency = "EUR" if index_type in ["XO", "EU"] else "USD"
- jp_yc = get_curve(trade_date, currency)
- start_date = previous_twentieth(trade_date)
- step_in_date = trade_date + datetime.timedelta(days=1)
- value_date = pd.Timestamp(trade_date) + 3 * BDay()
- args = (trade_date, jp_yc, start_date, step_in_date, value_date, end_dates)
- curves = build_curves_dist(sn_quotes, args)
- return curves, args
-
-def get_tranche_quotes(index_type, series, tenor, date=datetime.date.today()):
- conn = dbconn('serenitasdb')
- with conn.cursor() as c:
- c.callproc("get_tranche_quotes", (index_type, series, tenor, date))
- return pd.DataFrame.from_records(dict(d) for d in c)