diff options
Diffstat (limited to 'python/globeop_reports.py')
| -rw-r--r-- | python/globeop_reports.py | 292 |
1 files changed, 174 insertions, 118 deletions
diff --git a/python/globeop_reports.py b/python/globeop_reports.py index ffbd6a4a..b36d728f 100644 --- a/python/globeop_reports.py +++ b/python/globeop_reports.py @@ -10,122 +10,152 @@ import numpy as np import datetime -def get_monthly_pnl(group_by=['identifier']): +def get_monthly_pnl(group_by=["identifier"]): sql_string = "SELECT * FROM pnl_reports" - df_pnl = pd.read_sql_query(sql_string, dawn_engine, - parse_dates=['date'], - index_col=['date']) - df_pnl['identifier'] = df_pnl.invid.str.replace("_A$", "") - pnl_cols = ['bookunrealmtm', 'bookrealmtm', 'bookrealincome', 'bookunrealincome', 'totalbookpl'] - monthend_pnl = df_pnl.groupby(pd.Grouper(freq='M'), group_keys=False).apply(lambda df: df.loc[df.index[-1]]) - return monthend_pnl.groupby(['date'] + group_by)[['mtd' + col for col in pnl_cols]].sum() + df_pnl = pd.read_sql_query( + sql_string, dawn_engine, parse_dates=["date"], index_col=["date"] + ) + df_pnl["identifier"] = df_pnl.invid.str.replace("_A$", "") + pnl_cols = [ + "bookunrealmtm", + "bookrealmtm", + "bookrealincome", + "bookunrealincome", + "totalbookpl", + ] + monthend_pnl = df_pnl.groupby(pd.Grouper(freq="M"), group_keys=False).apply( + lambda df: df.loc[df.index[-1]] + ) + return monthend_pnl.groupby(["date"] + group_by)[ + ["mtd" + col for col in pnl_cols] + ].sum() def get_portfolio(report_date=None): if report_date is not None: sql_string = "SELECT * FROM valuation_reports where periodenddate = %s" - df = pd.read_sql_query(sql_string, dawn_engine, parse_dates=['periodenddate'], - index_col=['periodenddate'], params=[report_date,]) + df = pd.read_sql_query( + sql_string, + dawn_engine, + parse_dates=["periodenddate"], + index_col=["periodenddate"], + params=[report_date], + ) else: sql_string = "SELECT * FROM valuation_reports" - df = pd.read_sql_query(sql_string, dawn_engine, parse_dates=['periodenddate'], - index_col=['periodenddate']) - df['identifier'] = df.invid.str.replace("_A$", "") + df = pd.read_sql_query( + sql_string, + dawn_engine, + parse_dates=["periodenddate"], + index_col=["periodenddate"], + ) + df["identifier"] = df.invid.str.replace("_A$", "") return df def trade_performance(): sql_string = "SELECT * FROM bonds" - df_trades = pd.read_sql_query(sql_string, dawn_engine, - parse_dates={'lastupdate': {'utc': True}, - 'trade_date': {}, - 'settle_date': {}}) - df_trades = df_trades[df_trades['asset_class'] == 'Subprime'] + df_trades = pd.read_sql_query( + sql_string, + dawn_engine, + parse_dates={"lastupdate": {"utc": True}, "trade_date": {}, "settle_date": {}}, + ) + df_trades = df_trades[df_trades["asset_class"] == "Subprime"] df_pnl = get_monthly_pnl() - df_sell = df_trades[~df_trades.buysell].groupby('identifier').last().reset_index() + df_sell = df_trades[~df_trades.buysell].groupby("identifier").last().reset_index() df_sell.identifier = df_sell.identifier.str[:9] - df_sell['trade_pnl_date'] = df_sell.trade_date + MonthEnd(0) - df_buy = df_trades[df_trades.buysell].groupby('identifier').last().reset_index() + df_sell["trade_pnl_date"] = df_sell.trade_date + MonthEnd(0) + df_buy = df_trades[df_trades.buysell].groupby("identifier").last().reset_index() - df_all = df_sell.merge(df_pnl.groupby('identifier').sum().reset_index(), on=['identifier']) - df_all = df_all.merge(df_pnl.reset_index()[['date', 'identifier', 'mtdtotalbookpl']], - left_on=['trade_pnl_date', 'identifier'], - right_on=['date', 'identifier'], - suffixes=('', '_at_trade_month')) - df_all = df_all.drop(['date', 'trade_pnl_date'], axis=1) + df_all = df_sell.merge( + df_pnl.groupby("identifier").sum().reset_index(), on=["identifier"] + ) + df_all = df_all.merge( + df_pnl.reset_index()[["date", "identifier", "mtdtotalbookpl"]], + left_on=["trade_pnl_date", "identifier"], + right_on=["date", "identifier"], + suffixes=("", "_at_trade_month"), + ) + df_all = df_all.drop(["date", "trade_pnl_date"], axis=1) - #now build up the table - g = df_buy.groupby('identifier').sum() + # now build up the table + g = df_buy.groupby("identifier").sum() init_inv = g.principal_payment + g.accrued_payment - init_inv.name = 'initialinvestment' + init_inv.name = "initialinvestment" - first_buy_date = df_buy.groupby('identifier').first().trade_date - first_buy_date.name = 'firstbuydate' + first_buy_date = df_buy.groupby("identifier").first().trade_date + first_buy_date.name = "firstbuydate" - df_all = df_all.join(init_inv, on='identifier') - df_all = df_all.join(first_buy_date, on='identifier') - df_all['percent_gain'] = df_all.mtdtotalbookpl / df_all.initialinvestment - df_all['days_held'] = df_all.trade_date - df_all.firstbuydate + df_all = df_all.join(init_inv, on="identifier") + df_all = df_all.join(first_buy_date, on="identifier") + df_all["percent_gain"] = df_all.mtdtotalbookpl / df_all.initialinvestment + df_all["days_held"] = df_all.trade_date - df_all.firstbuydate - df_all = df_all.sort_values('trade_date', ascending=False) + df_all = df_all.sort_values("trade_date", ascending=False) return df_all def get_net_navs(): sql_string = "SELECT * FROM valuation_reports" - df_val = pd.read_sql_query(sql_string, dawn_engine, parse_dates=['periodenddate']) - nav = df_val[df_val.fund == 'SERCGMAST'].groupby('periodenddate')['endbooknav'].sum() - nav = nav.resample('M').last() - df = pd.read_csv('/home/serenitas/edwin/Python/subscription_fee_data.csv', - parse_dates=['date'], - index_col =['date']) - df.index = df.index.to_period('M').to_timestamp('M') + df_val = pd.read_sql_query(sql_string, dawn_engine, parse_dates=["periodenddate"]) + nav = ( + df_val[df_val.fund == "SERCGMAST"].groupby("periodenddate")["endbooknav"].sum() + ) + nav = nav.resample("M").last() + df = pd.read_csv( + "/home/serenitas/edwin/Python/subscription_fee_data.csv", + parse_dates=["date"], + index_col=["date"], + ) + df.index = df.index.to_period("M").to_timestamp("M") df = df.join(nav) - df['begbooknav'] = (df.endbooknav + df.net_flow).shift(1) - df.at[('2013-01-31', 'begbooknav')] = 12500000 + df["begbooknav"] = (df.endbooknav + df.net_flow).shift(1) + df.at[("2013-01-31", "begbooknav")] = 12500000 return df def shift_cash(date, amount, df, strat): nav = get_net_navs() - df.loc[date, strat] = df.loc[date, strat] - amount/nav.loc[date].endbooknav - df.loc[date,'Cash'] = df.loc[date, 'Cash'] + amount/nav.loc[date].endbooknav + df.loc[date, strat] = df.loc[date, strat] - amount / nav.loc[date].endbooknav + df.loc[date, "Cash"] = df.loc[date, "Cash"] + amount / nav.loc[date].endbooknav return df def calc_trade_performance_stats(): - df = trade_performance().set_index('trade_date') + df = trade_performance().set_index("trade_date") df.days_held = df.days_held.dt.days - df['winners'] = df.percent_gain > 0 - df['curr_face'] = df.principal_payment/(df.price/100) + df["winners"] = df.percent_gain > 0 + df["curr_face"] = df.principal_payment / (df.price / 100) - index = ['All', '2017', '2016', '2015', '2014', '2013'] + index = ["All", "2017", "2016", "2015", "2014", "2013"] results = pd.DataFrame(index=index) win_per = df.winners.mean() loss_per = 1 - win_per - for x, df1 in df.groupby('winners'): - for y, df2 in df1.groupby(pd.Grouper(freq='A')): + for x, df1 in df.groupby("winners"): + for y, df2 in df1.groupby(pd.Grouper(freq="A")): y = y.date().year - results.loc[y] = df2[df2.days_held.notnull()].mean()[['curr_face', 'initialinvestment', 'days_held']] + results.loc[y] = df2[df2.days_held.notnull()].mean()[ + ["curr_face", "initialinvestment", "days_held"] + ] - df[df.days_held.notnull()]['days_held'].groupby(pd.Grouper(freq='A')).mean() + df[df.days_held.notnull()]["days_held"].groupby(pd.Grouper(freq="A")).mean() -def hist_pos(asset_class = 'rmbs', dm=False): +def hist_pos(asset_class="rmbs", dm=False): end_date = pd.datetime.today() - MonthEnd(1) - dates = pd.date_range(datetime.date(2013,1,31), end_date, freq='M') + dates = pd.date_range(datetime.date(2013, 1, 31), end_date, freq="M") calc_df = pd.DataFrame() for d in dates: - if asset_class == 'rmbs': - if d.date() == datetime.date(2018,11,30): - d_1 = datetime.date(2018,12,3) - elif d.date() == datetime.date(2016,2,29): - d_1 = datetime.date(2016,2,29) + if asset_class == "rmbs": + if d.date() == datetime.date(2018, 11, 30): + d_1 = datetime.date(2018, 12, 3) + elif d.date() == datetime.date(2016, 2, 29): + d_1 = datetime.date(2016, 2, 29) else: d_1 = None calc_df = calc_df.append(rmbs_pos(d, d_1, dm)) @@ -133,92 +163,118 @@ def hist_pos(asset_class = 'rmbs', dm=False): calc_df = calc_df.append(clo_pos(d), sort=True) return calc_df + def rmbs_pos(date, model_date=None, dm=False): date = date.date() if isinstance(date, pd.Timestamp) else date - #just non-zero factor bonds for now, need to incorporate that + # just non-zero factor bonds for now, need to incorporate that pos = get_portfolio(date) - pos = pos[(pos.port == 'MORTGAGES') & - (pos.endbookmv > 0) & - (pos.custacctname == 'V0NSCLMAMB') & - (pos['invid'].str.len() == 9)] - pos = pos[['endbookmv', 'endlocalmarketprice', 'identifier']] + pos = pos[ + (pos.port == "MORTGAGES") + & (pos.endbookmv > 0) + & (pos.custacctname == "V0NSCLMAMB") + & (pos["invid"].str.len() == 9) + ] + pos = pos[["endbookmv", "endlocalmarketprice", "identifier"]] - sql_string = ("SELECT distinct timestamp FROM priced where " - "normalization = 'current_notional' and " - "model_version = 1 and " - "date(timestamp) < %s and date(timestamp) > %s " - "order by timestamp desc") - timestamps = pd.read_sql_query(sql_string, dawn_engine, parse_dates=['timestamp'], - params=[date, date - DateOffset(15, 'D')]) + sql_string = ( + "SELECT distinct timestamp FROM priced where " + "normalization = 'current_notional' and " + "model_version = 1 and " + "date(timestamp) < %s and date(timestamp) > %s " + "order by timestamp desc" + ) + timestamps = pd.read_sql_query( + sql_string, + dawn_engine, + parse_dates=["timestamp"], + params=[date, date - DateOffset(15, "D")], + ) if model_date is None: model_date = (timestamps.loc[0][0]).date() yc = YieldTermStructure() yc.link_to(YC(evaluation_date=model_date)) - sql_string = ("SELECT date(timestamp) as timestamp, cusip, model_version, " - "pv, moddur, delta_yield, delta_ir " - "FROM priced where date(timestamp) = %s " - "and model_version <> 2") + sql_string = ( + "SELECT date(timestamp) as timestamp, cusip, model_version, " + "pv, moddur, delta_yield, delta_ir " + "FROM priced where date(timestamp) = %s " + "and model_version <> 2" + ) params_list = (model_date,) if date > datetime.date(2017, 9, 30): - r = dawn_engine.execute("SELECT latest_sim FROM latest_sim(%s)", - model_date) + r = dawn_engine.execute("SELECT latest_sim FROM latest_sim(%s)", model_date) model_id, = next(r) sql_string += " AND model_id_sub = %s" params_list += (model_id,) - model = pd.read_sql_query(sql_string, dawn_engine, parse_dates=['timestamp'], - params=params_list) - model = model[model['pv'] != 0] - pos = pos.assign(curr_ntl = pos.endbookmv/pos.endlocalmarketprice *100) - comb_g = pos.groupby('identifier').agg({'endbookmv': 'sum', - 'curr_ntl': 'sum'}) - comb_g['date'] = pd.to_datetime(date) - model = pd.merge(comb_g, model, left_on='identifier', right_on='cusip') - positions = model.set_index(['cusip', 'model_version']).unstack(1).dropna() - v1 = positions.xs(1, level='model_version', axis=1) - v3 = positions.xs(3, level='model_version', axis=1) + model = pd.read_sql_query( + sql_string, dawn_engine, parse_dates=["timestamp"], params=params_list + ) + model = model[model["pv"] != 0] + pos = pos.assign(curr_ntl=pos.endbookmv / pos.endlocalmarketprice * 100) + comb_g = pos.groupby("identifier").agg({"endbookmv": "sum", "curr_ntl": "sum"}) + comb_g["date"] = pd.to_datetime(date) + model = pd.merge(comb_g, model, left_on="identifier", right_on="cusip") + positions = model.set_index(["cusip", "model_version"]).unstack(1).dropna() + v1 = positions.xs(1, level="model_version", axis=1) + v3 = positions.xs(3, level="model_version", axis=1) if dm is True: libor1m = USDLibor(Period(1, Months), yc) - libor = libor1m.fixing(libor1m.fixing_calendar.adjust(Date.from_datetime(model_date))) - v3 = v3.assign(b_yield = v3.moddur.apply(lambda x: float(yc.zero_rate(x))) - - libor) + libor = libor1m.fixing( + libor1m.fixing_calendar.adjust(Date.from_datetime(model_date)) + ) + v3 = v3.assign( + b_yield=v3.moddur.apply(lambda x: float(yc.zero_rate(x))) - libor + ) else: - v3 = v3.assign(b_yield = v3.moddur.apply(lambda x: float(yc.zero_rate(x)))) - v3.b_yield += np.minimum((v1.pv * v1.curr_ntl/ v1.endbookmv) - ** (1/v1.moddur) - 1, 1).dropna() + v3 = v3.assign(b_yield=v3.moddur.apply(lambda x: float(yc.zero_rate(x)))) + v3.b_yield += np.minimum( + (v1.pv * v1.curr_ntl / v1.endbookmv) ** (1 / v1.moddur) - 1, 1 + ).dropna() v3.delta_yield *= v3.endbookmv / v3.pv - v3.delta_ir *= np.minimum(1, 1/v3.moddur) * \ - (v3.endbookmv/v3.curr_ntl)/ v3.pv * v3.curr_ntl - return v3.reset_index().set_index('date') + v3.delta_ir *= ( + np.minimum(1, 1 / v3.moddur) + * (v3.endbookmv / v3.curr_ntl) + / v3.pv + * v3.curr_ntl + ) + return v3.reset_index().set_index("date") + def clo_pos(date): date = date.date() if isinstance(date, pd.Timestamp) else date df = get_portfolio(date) - df = df[(df.port == 'CLO') & - (df.endbookmv > 0) & - (df.custacctname == 'V0NSCLMAMB') & - (df['invid'].str.len() >= 9)] - df = df[['endbookmv', 'endlocalmarketprice', 'identifier']] + df = df[ + (df.port == "CLO") + & (df.endbookmv > 0) + & (df.custacctname == "V0NSCLMAMB") + & (df["invid"].str.len() >= 9) + ] + df = df[["endbookmv", "endlocalmarketprice", "identifier"]] if df.empty is True: return df else: - sql_string = "select distinct cusip, identifier from bonds where asset_class = 'CLO'" - cusip_map = {identifier: cusip for cusip, identifier in dawn_engine.execute(sql_string)} - df['cusip'] = df['identifier'].replace(cusip_map) - cusips = df.loc[[df.index[-1]], 'cusip'] + sql_string = ( + "select distinct cusip, identifier from bonds where asset_class = 'CLO'" + ) + cusip_map = { + identifier: cusip for cusip, identifier in dawn_engine.execute(sql_string) + } + df["cusip"] = df["identifier"].replace(cusip_map) + cusips = df.loc[[df.index[-1]], "cusip"] placeholders = ",".join(["%s"] * (1 + len(cusips))) sql_string = f"SELECT * FROM historical_cusip_risk({placeholders})" - etengine = dbengine('etdb') - model = pd.read_sql_query(sql_string, etengine, parse_dates=['pricingdate'], - params=(date, *cusips)) + etengine = dbengine("etdb") + model = pd.read_sql_query( + sql_string, etengine, parse_dates=["pricingdate"], params=(date, *cusips) + ) model.index = cusips - calc_df = df.loc[[df.index[-1]]].set_index('cusip').join(model) - calc_df['hy_equiv'] = calc_df.delta * calc_df.endbookmv - calc_df['date'] = date - return calc_df.set_index('date') + calc_df = df.loc[[df.index[-1]]].set_index("cusip").join(model) + calc_df["hy_equiv"] = calc_df.delta * calc_df.endbookmv + calc_df["date"] = date + return calc_df.set_index("date") |
