diff options
Diffstat (limited to 'python/analytics/index_data.py')
| -rw-r--r-- | python/analytics/index_data.py | 188 |
1 files changed, 112 insertions, 76 deletions
diff --git a/python/analytics/index_data.py b/python/analytics/index_data.py index beff0647..31fe8e4c 100644 --- a/python/analytics/index_data.py +++ b/python/analytics/index_data.py @@ -25,39 +25,52 @@ def insert_quotes(): WHERE index='HY' and series=23 and date='2017-02-02' """ - dates = pd.DatetimeIndex(['2014-05-21', '2015-02-19', '2015-03-05', '2015-06-23']) - df = pd.read_sql_query("SELECT DISTINCT ON (date) * FROM index_quotes " - "WHERE index='HY' AND tenor='5yr' " - "ORDER BY date, series DESC, version DESC", - _engine, parse_dates=['date'], index_col=['date']) + dates = pd.DatetimeIndex(["2014-05-21", "2015-02-19", "2015-03-05", "2015-06-23"]) + df = pd.read_sql_query( + "SELECT DISTINCT ON (date) * FROM index_quotes " + "WHERE index='HY' AND tenor='5yr' " + "ORDER BY date, series DESC, version DESC", + _engine, + parse_dates=["date"], + index_col=["date"], + ) df = df.loc[dates] for tup in df.itertuples(): result = serenitas_engine.execute( "SELECT indexfactor, cumulativeloss FROM index_version " "WHERE index = 'HY' AND series=%s AND version in (%s, %s)" "ORDER BY version", - (tup.series, tup.version, tup.version+1)) + (tup.series, tup.version, tup.version + 1), + ) factor1, cumloss1 = result.fetchone() factor2, cumloss2 = result.fetchone() - recovery = 1-(cumloss2-cumloss1) - version2_price = (factor1 * tup.closeprice - 100 * recovery)/factor2 + recovery = 1 - (cumloss2 - cumloss1) + version2_price = (factor1 * tup.closeprice - 100 * recovery) / factor2 print(version2_price) serenitas_engine.execute( "INSERT INTO index_quotes(date, index, series, version, tenor, closeprice)" "VALUES(%s, %s, %s, %s, %s, %s)", - (tup.Index, 'HY', tup.series, tup.version+1, tup.tenor, version2_price)) + (tup.Index, "HY", tup.series, tup.version + 1, tup.tenor, version2_price), + ) -def get_index_quotes(index=None, series=None, tenor=None, from_date=None, - years=3, remove_holidays=True, source='MKIT'): +def get_index_quotes( + index=None, + series=None, + tenor=None, + from_date=None, + years=3, + remove_holidays=True, + source="MKIT", +): args = locals().copy() - del args['remove_holidays'] - if args['years'] is not None: - args['date'] = (pd.Timestamp.now() - pd.DateOffset(years=years)).date() - del args['years'] - if args['from_date']: - args['date'] = args['from_date'] - del args['from_date'] + del args["remove_holidays"] + if args["years"] is not None: + args["date"] = (pd.Timestamp.now() - pd.DateOffset(years=years)).date() + del args["years"] + if args["from_date"]: + args["date"] = args["from_date"] + del args["from_date"] def make_str(key, val): if isinstance(val, list): @@ -69,33 +82,42 @@ def get_index_quotes(index=None, series=None, tenor=None, from_date=None, op = "=" return "{} {} %({})s".format(key, op, key) - where_clause = " AND ".join(make_str(k, v) - for k, v in args.items() if v is not None) + where_clause = " AND ".join( + make_str(k, v) for k, v in args.items() if v is not None + ) sql_str = "SELECT * FROM index_quotes_pre LEFT JOIN index_risk2 USING (id)" if where_clause: sql_str = " WHERE ".join([sql_str, where_clause]) def make_params(args): - return {k: tuple(v) if isinstance(v, list) else v - for k, v in args.items() if v is not None} + return { + k: tuple(v) if isinstance(v, list) else v + for k, v in args.items() + if v is not None + } - df = pd.read_sql_query(sql_str, serenitas_engine, parse_dates=['date'], - index_col=['date', 'index', 'series', 'version'], - params=make_params(args)) + df = pd.read_sql_query( + sql_str, + serenitas_engine, + parse_dates=["date"], + index_col=["date", "index", "series", "version"], + params=make_params(args), + ) df.tenor = df.tenor.astype(tenor_t) - df = df.set_index('tenor', append=True) + df = df.set_index("tenor", append=True) df.sort_index(inplace=True) # get rid of US holidays if remove_holidays: dates = df.index.levels[0] - if index in ['IG', 'HY']: + if index in ["IG", "HY"]: holidays = bond_cal().holidays(start=dates[0], end=dates[-1]) df = df.loc(axis=0)[dates.difference(holidays), :, :] return df -def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, - years=3, per=1): +def index_returns( + df=None, index=None, series=None, tenor=None, from_date=None, years=3, per=1 +): """computes spreads and price returns Parameters @@ -116,59 +138,71 @@ def index_returns(df=None, index=None, series=None, tenor=None, from_date=None, """ if df is None: df = get_index_quotes(index, series, tenor, from_date, years) - spread_return = (df. - groupby(level=['index', 'series', 'tenor', 'version']). - close_spread. - pct_change(periods=per)) - price_return = (df. - groupby(level=['index', 'series', 'tenor', 'version']). - close_price. - diff() / 100) - df = pd.concat([spread_return, price_return], axis=1, - keys=['spread_return', 'price_return']) - df = df.groupby(level=['date', 'index', 'series', 'tenor']).nth(0) - coupon_data = pd.read_sql_query("SELECT index, series, tenor, coupon * 1e-4 AS coupon, " - "maturity FROM " - "index_maturity WHERE coupon is NOT NULL", - serenitas_engine, - index_col=['index', 'series', 'tenor']) - df = df.reset_index('date').join(coupon_data).reset_index('tenor') + spread_return = df.groupby( + level=["index", "series", "tenor", "version"] + ).close_spread.pct_change(periods=per) + price_return = ( + df.groupby(level=["index", "series", "tenor", "version"]).close_price.diff() + / 100 + ) + df = pd.concat( + [spread_return, price_return], axis=1, keys=["spread_return", "price_return"] + ) + df = df.groupby(level=["date", "index", "series", "tenor"]).nth(0) + coupon_data = pd.read_sql_query( + "SELECT index, series, tenor, coupon * 1e-4 AS coupon, " + "maturity FROM " + "index_maturity WHERE coupon is NOT NULL", + serenitas_engine, + index_col=["index", "series", "tenor"], + ) + df = df.reset_index("date").join(coupon_data).reset_index("tenor") # for some reason pandas doesn't keep the categories, so we have to # do this little dance df.tenor = df.tenor.astype(tenor_t) - df = df.set_index('tenor', append=True) - df['day_frac'] = (df.groupby(level=['index', 'series', 'tenor'])['date']. - transform(lambda s: s. - diff(). - astype('timedelta64[D]') / 360)) - df['price_return'] += df.day_frac * df.coupon - df = df.drop(['day_frac', 'coupon', 'maturity'], axis=1) - return df.set_index(['date'], append=True) + df = df.set_index("tenor", append=True) + df["day_frac"] = df.groupby(level=["index", "series", "tenor"])["date"].transform( + lambda s: s.diff().astype("timedelta64[D]") / 360 + ) + df["price_return"] += df.day_frac * df.coupon + df = df.drop(["day_frac", "coupon", "maturity"], axis=1) + return df.set_index(["date"], append=True) def get_singlenames_quotes(indexname, date, tenors): - r = serenitas_engine.execute("SELECT * FROM curve_quotes2(%s, %s, %s)", - (indexname, date, list(tenors))) + r = serenitas_engine.execute( + "SELECT * FROM curve_quotes2(%s, %s, %s)", (indexname, date, list(tenors)) + ) return list(r) def build_curve(r, tenors): - if r['date'] is None: + if r["date"] is None: raise ValueError(f"Curve for {r['cds_ticker']} is missing") - spread_curve = 1e-4 * np.array(r['spread_curve'], dtype='float') - upfront_curve = 1e-2 * np.array(r['upfront_curve'], dtype='float') - recovery_curve = np.array(r['recovery_curve'], dtype='float') - yc = get_curve(r['date'], r['currency']) + spread_curve = 1e-4 * np.array(r["spread_curve"], dtype="float") + upfront_curve = 1e-2 * np.array(r["upfront_curve"], dtype="float") + recovery_curve = np.array(r["recovery_curve"], dtype="float") + yc = get_curve(r["date"], r["currency"]) try: - sc = SpreadCurve(r['date'], yc, None, None, None, - tenors, spread_curve, upfront_curve, recovery_curve, - ticker=r['cds_ticker'], seniority=Seniority[r['seniority']], - doc_clause=DocClause[r['doc_clause']], - defaulted=r['event_date']) + sc = SpreadCurve( + r["date"], + yc, + None, + None, + None, + tenors, + spread_curve, + upfront_curve, + recovery_curve, + ticker=r["cds_ticker"], + seniority=Seniority[r["seniority"]], + doc_clause=DocClause[r["doc_clause"]], + defaulted=r["event_date"], + ) except ValueError as e: print(r[0], e) - return r['weight'], None - return r['weight'], sc + return r["weight"], None + return r["weight"], sc def build_curves(quotes, args): @@ -185,20 +219,22 @@ def build_curves_dist(quotes, args, workers=4): @lru_cache(maxsize=16) def _get_singlenames_curves(index_type, series, trade_date, tenors): - sn_quotes = get_singlenames_quotes(f"{index_type.lower()}{series}", - trade_date, tenors) - args = (np.array(tenors, dtype='float'),) + sn_quotes = get_singlenames_quotes( + f"{index_type.lower()}{series}", trade_date, tenors + ) + args = (np.array(tenors, dtype="float"),) return build_curves(sn_quotes, args) -def get_singlenames_curves(index_type, series, trade_date, - tenors=(0.5, 1, 2, 3, 4, 5, 7, 10)): +def get_singlenames_curves( + index_type, series, trade_date, tenors=(0.5, 1, 2, 3, 4, 5, 7, 10) +): # tenors need to be a subset of (0.5, 1, 2, 3, 4, 5, 7, 10) if isinstance(trade_date, pd.Timestamp): trade_date = trade_date.date() - return _get_singlenames_curves(index_type, series, - min(datetime.date.today(), trade_date), - tenors) + return _get_singlenames_curves( + index_type, series, min(datetime.date.today(), trade_date), tenors + ) def get_tranche_quotes(index_type, series, tenor, date=datetime.date.today()): |
