from glob import iglob import os import pandas as pd from itertools import chain from dates import bus_day from utils.db import dbengine def get_globs(fname, years=["2013", "2014", "2015", "2016", "2017"]): basedir = "/home/serenitas/Daily" globs = [ iglob( os.path.join( basedir, year, "{0}_*/{0}*/Reports/{1}.csv".format(year, fname) ) ) for year in years ] for year in years[-2:]: globs.append( iglob(os.path.join(basedir, "{0}-*/Reports/{1}.csv".format(year, fname))) ) return globs def read_valuation_report(f): date = pd.Timestamp(f.parts[4]) if date >= pd.Timestamp("2013-02-06"): df = pd.read_csv(f, parse_dates=["KnowledgeDate", "PeriodEndDate"]) else: df = pd.read_csv(f) df["KnowledgeDate"] = date df["PeriodEndDate"] = date - bus_day df["row"] = df.index if "AccountingPeriod" in df: del df["AccountingPeriod"] if "CounterParty" in df: del df["CounterParty"] df = df.rename(columns={"CounterPartyCode": "counterparty"}) df.counterparty = df.counterparty.str.replace("BARC$", "BARCNY") if "Strat" in df: df.Strat = df.Strat.str.replace("^(SERCGMAST__){1,2}(M_|SER_)?", "", 1) df = df.replace({"Strat": {"TCDSCSH": "TCSH", "MTG_CRT_LD": "CRT_LD"}}) if "Port" in df: df.Port = df.Port.str.replace("^(SERCGMAST__){1,2}(SERG__|SERG_)?", "", 1) df.columns = df.columns.str.lower() return df def valuation_reports(): df = pd.concat( read_valuation_report(f) for f in chain.from_iterable(get_globs("Valuation_Report")) ) # There can be duplicates in case of holidays df = df.sort_values(["periodenddate", "row", "knowledgedate"]) df = df.drop_duplicates(["periodenddate", "row"], "last") df.to_sql("valuation_reports", dbengine("dawndb"), if_exists="append", index=False) def read_pnl_report(f): df = pd.read_csv(f) df.Strat = df.Strat.str.replace("^(SERCGMAST__){1,2}(M_|SER_)?", "", 1) df = df.replace({"Strat": {"TCDSCSH": "TCSH", "MTG_CRT_LD": "CRT_LD"}}) df.Port = df.Port.str.replace("^(SERCGMAST__){1,2}(SERG__|SERG_)?", "", 1) df["LongShortIndicator"] = df["LongShortIndicator"].str.strip() df.columns = df.columns.str.lower().str.replace(" ", "") return df def pnl_reports(): df = {} for f in chain.from_iterable(get_globs("Pnl*")): if not (f.endswith("Pnl.csv") and f.endswith("Pnl_Report.csv")): continue date = pd.Timestamp(f.rsplit("/", 3)[1]) date = date - bus_day df[date] = read_pnl_report(f) df = pd.concat(df, names=["date", "row"]).reset_index() df.to_sql("pnl_reports", dbengine("dawndb"), if_exists="append", index=False) def read_cds_report(f): df = pd.read_csv(f) df2 = pd.read_csv(f.parent / "All_Report.csv") def drop_zero_count(df): for col in df: vc = len(df[col].value_counts()) if vc == 0: del df[col] continue drop_zero_count(df) drop_zero_count(df2) contract = df["Contractual Definition"] contract = contract.where(contract.isin(["ISDA2014", "ISDA2003Cred"]), "ISDA2014") df["Contractual Definition"] = contract to_drop = [ "Bloomberg Yellow key", "Created User", "Last Modified User", "Last Modified Date", "Fund Long Name", "Instrument Sub Type", "Netting Id", "Client", "Trade Status", "Position Status", "Clearing Broker", "Settle Mode", "Off Price", "On Price", "Price Ccy", "VAT", "SEC Fee", "Clearing Fee", "Trading Notional", "BBGID", "Business Unit", "Option Type", ] df = df.drop(to_drop, axis=1, errors="ignore") df2 = df2.drop(to_drop, axis=1, errors="ignore") df.columns = df.columns.str.lower().str.replace(" ", "_") df2.columns = df2.columns.str.lower().str.replace(" ", "_") df.calendar = df.calendar.str.replace(" ", "") df = df.rename(columns={"direction": "buy/sell"}) df.roll_convention = df.roll_convention.str.title() df = df[df.strategy != "SER_TEST"] df.loc[df.strategy == "SERCGMAST__MBSCDS", "strategy"] = "MBSCDS" df.strategy = df.strategy.str.replace("SER_", "") df["buy/sell"] = df["buy/sell"].astype("category") df["buy/sell"].cat.categories = ["Buyer", "Seller"] del df["independent_%"] df2 = df2.rename(columns={"independent_%": "independent_perc"}) df.prime_broker = df.prime_broker.where(df.prime_broker != "NONE") return ( df.set_index("gtid") .join(df2.set_index("gtid")[df2.columns.difference(df.columns)]) .reset_index() ) def read_swaption_report(f): df = pd.read_csv(f) df2 = pd.read_csv(f.parent / "All_Report.csv") def drop_zero_count(df): for k, v in df.iteritems(): if len(v.value_counts()) == 0: del df[k] drop_zero_count(df) drop_zero_count(df2) # df2 = df2[df2["Product Sub Type"] == "CD_INDEX_OPTION"] # df = df[df["Product Sub Type"] == "CD_INDEX_OPTION"] df = df.set_index("GTID").join(df2.set_index("GTID")[["Geneva ID"]]) for key in [ "Created User", "Last Modified User", "Last Modified Date", "Trade Status", "Position Status", "Client", "External Trade ID", "Fund", "Fund Long Name", "Prime Broker", "Transaction Status", "Created Date", "Comments", "Trade Type", ]: del df[key] for k, v in df.iteritems(): if "Date" in k and "End Date" not in k: df[k] = pd.to_datetime(v) return df def cds_reports(): df = {} for f in chain.from_iterable(get_globs("CDS_Report")): date = pd.Timestamp(f.rsplit("/", 3)[1]) old_report = date <= pd.Timestamp("2017-02-28") or date == pd.Timestamp( "2017-03-02" ) date = date - bus_day df[date] = read_cds_report(f, old_report) df = pd.concat(df, names=["date", "row"]).reset_index() return df def monthly_pnl_bycusip(df, strats): df = df[(df.strat.isin(strats)) & (df.custacctname == "V0NSCLMAMB")] pnl_cols = [ "bookunrealmtm", "bookrealmtm", "bookrealincome", "bookunrealincome", "totalbookpl", ] return df.groupby("invid").resample("M").last()[["mtd" + col for col in pnl_cols]] if __name__ == "__main__": valuation_reports() pnl_reports() df_val = pd.read_hdf("globeop.hdf", "valuation_report") df_pnl = pd.read_hdf("globeop.hdf", "pnl") nav = ( df_val[df_val.Fund == "SERCGMAST"].groupby("PeriodEndDate")["EndBookNAV"].sum() ) subprime_strats = ["MTG_GOOD", "MTG_RW", "MTG_IO", "MTG_THRU", "MTG_B4PR"] clo_strats = ["CLO_BBB", "CLO_AAA", "CLO_BB20"] ## daily pnl by cusip # subprime_daily_pnl = daily_pnl_bycusip(df_pnl, subprime_strats) df_monthly = monthly_pnl_bycusip(df_pnl, subprime_strats) # df_monthly.loc[idx[ts('2015-01-01'):ts('2015-01-31'),:],:] # clo = df_pnl[df_pnl.Strat.isin(clo_strats)] # clo_monthly_pnl = clo.groupby(level=0).sum()['MTD TotalBookPL'].resample('M').last() # clo.groupby(level=0).sum()['2015-12-01':'2015-12-31'] df_val.set_index(["custacctname", "periodenddate", "invid", "strat"])