import os import os.path import pandas as pd from pandas.tseries.offsets import BDay, DateOffset import numpy as np from intex_scenarios import dealname_from_cusip from zipfile import ZipFile import re, datetime, yaml from db import query_db, conn from quantlib.time.api import ( Schedule, Actual360, Period, Months, Calendar, Unadjusted, ModifiedFollowing, Date, ) from quantlib.util.converter import pydate_to_qldate, qldate_to_pydate from quantlib.settings import Settings from yieldcurve import YC from optimization import KLfit import rpy2.robjects as ro import sys def sanitize_float(string): try: string = string.replace(",", "") if "(" in string: return -float(string[1:-1]) else: return float(string) except AttributeError: return string def processzipfiles(tradedate=datetime.date.today()): pricesdir = os.path.join( os.environ["BASE_DIR"], "Scenarios", "Prices_" + str(tradedate) ) zipfiles = [ os.path.join(pricesdir, f) for f in os.listdir(pricesdir) if f.endswith(".zip") ] zipfiles = sorted(zipfiles, key=lambda x: os.stat(x).st_ctime) cusips_dict = {} dealnames_dict = {} for i, zf in enumerate(zipfiles): with ZipFile(zf) as myzip: allfiles = set([f.filename.split("-")[0] for f in myzip.infolist()]) allfiles = allfiles - {"Total"} dealnames = set( [ f.filename.split("-")[0] for f in myzip.infolist() if re.search("COLLAT.*Scen100", f.filename) ] ) cusips = allfiles - dealnames dealnames = [d.lower() for d in dealnames] cusips_dict.update((cusip, i) for cusip in cusips) dealnames_dict.update((dealname, i) for dealname in dealnames) return {"dealnames": dealnames_dict, "cusips": cusips_dict, "zipfiles": zipfiles} def get_configfile(dealname, tradedate): configfile = os.path.join( os.environ["BASE_DIR"], "Scenarios", "Intex curves_" + str(tradedate), "csv", dealname + ".config", ) try: with open(configfile) as fh: config = yaml.load(fh, Loader=yaml.FullLoader) except FileNotFoundError: config = {"reinvflag": True} return config def get_dist(date): distfile = os.path.join( os.environ["BASE_DIR"], "Scenarios", "Calibration", "marketdata-{0:%Y-%m-%d}.RData".format(date), ) dist = ro.r.load(distfile) return {"L": np.array(dist[0]), "R": np.array(dist[1])} def get_dealdata(dealname, tradedate): sqlstr = ( 'select "Curr Collat Bal" AS currbal, reinv_end_date, first_pay_date,' 'maturity, "Principal Bal", pay_day from historical_clo_universe(%s, %s)' ) data = {k: v for k, v in query_db(sqlstr, (dealname, tradedate)).items()} data["mv"] = query_db( "select marketvalue from latest_deal_model_numbers where dealname = %s", (dealname,), )[0] return data def get_cusipdata(cusip, tradedate): sqlstring = "select curr_balance, spread from historical_cusip_universe(%s, %s)" sqldata = query_db(sqlstr, (cusip, str(tradedate))) return sqldata def get_dealschedule(dealdata, freq="1Mo", adj=Unadjusted): us_cal = Calendar.from_name("USA") if ( not dealdata["Pay Day"] or abs((dealdata["Deal Next Pay Date"] - dealdata["Pay Day"]).days - 90) > 10 ): dealdata["Pay Day"] = dealdata["Deal Next Pay Date"] - DateOffset(months=3) return Schedule( pydate_to_qldate(dealdata["Pay Day"]), pydate_to_qldate(dealdata["maturity"]), Period(freq), us_cal, adj, adj, ) def dealname_from_cusip(conn, cusips): with conn.cursor() as c: c.callproc("dealname_from_cusip", cusips) dealnames = [d[0] for d in c] return dealnames def discounts(tradedate): calibration_date = tradedate - BDay(1) m = YC(calibration_date) alldates = pd.date_range( pd.to_datetime(m.reference_date, format="%d/%m/%Y"), pd.to_datetime(m.max_date, format="%d/%m/%Y"), ) alldates_ql = [Date(d.day, d.month, d.year) for d in alldates] df = [m.discount(day) for day in alldates_ql] yearfrac = alldates.to_series() - alldates[0] yearfrac = yearfrac.astype("timedelta64[D]") / 365 return pd.DataFrame({"yearfrac": yearfrac, "df": df}, index=alldates) def getdealcf(dealnames, zipfiles, tradedate=datetime.date.today()): n_scenarios = 100 cfdata = {} discounts_table = discounts(tradedate) fields = ["Cashflow", "Principal", "Interest"] for dealname, i in dealnames.items(): zipfile = zipfiles[i] dealdata = get_dealdata(dealname, tradedate) cfdata[dealname] = {k: dealdata[k] for k in ["mv", "currbal"]} config = get_configfile(dealname, tradedate) if not dealdata["reinv_end_date"] or not config["reinvflag"]: tranches = ["COLLAT"] else: tranches = ["COLLAT_INITIAL", "COLLAT_REINVEST"] cf = {} with ZipFile(zipfile) as myzip: for tranche in tranches: scen = np.zeros((100, 3)) for j in range(n_scenarios): filename = "{0}-{1}-CF-Scen{2}.txt".format( dealname.upper(), tranche, j + 1 ) data = pd.read_table( myzip.open(filename), skiprows=[1, 2], parse_dates=[0], thousands=",", date_parser=lambda x: datetime.datetime.strptime( x, "%b %d, %Y" ), index_col=0, ) for c in fields: if data.dtypes[c] != np.dtype("float64"): data[c] = data[c].apply(sanitize_float) data = data[fields].join(discounts_table) scen[j, :] = np.dot(data.df, data[fields]) cf[tranche] = pd.DataFrame(scen, columns=fields) cfdata[dealname]["panel"] = pd.concat(cf) cf = cfdata[dealname]["panel"].Cashflow.sum(level=1) cfdata[dealname]["wapbasis"] = (cf.mean() - dealdata["mv"]) / dealdata["mv"] program = KLfit( cf.values / 1e8, np.ones(n_scenarios) / n_scenarios, dealdata["mv"] / 1e8 ) cfdata[dealname]["weight"] = program["weight"] print(dealname) return cfdata def getcusipcf(params, cfdata, tradedate): calibration_date = tradedate - BDay(1) dist = get_dist(calibration_date) cusipdata = {} n_scenarios = 100 intexfields = [ "Cashflow", "Principal", "Interest", "Balance", "Accum Interest Shortfall", ] fields = ["Cashflow", "Principal", "Interest"] cusips = list(params["cusips"].keys()) dealnames = dealname_from_cusip(conn, cusips) cusips = [c for c, d in zip(cusips, dealnames) if d in cfdata.keys()] discounts_table = discounts(tradedate) for i, cusip in enumerate(cusips): zipfile = params["zipfiles"][params["cusips"][cusip]] dealname = dealnames[i] dealdata = get_dealdata(dealname, tradedate) with ZipFile(zipfile) as myzip: scen = np.zeros((n_scenarios, 5)) for j in range(n_scenarios): filename = "{0}-CF-Scen{1}.txt".format(cusip, j + 1) data = pd.read_table( myzip.open(filename), skiprows=[1, 2], parse_dates=[0], thousands=",", date_parser=lambda x: datetime.datetime.strptime(x, "%b %d, %Y"), index_col=0, usecols=range(6), ) for c in intexfields: if data.dtypes[c] != np.dtype("float64"): data[c] = data[c].apply(sanitize_float) data["Balance"] = np.maximum( data.Balance - data["Accum Interest Shortfall"], 0 ) data = data.join(discounts_table) scen[j, :3] = data.df.dot(data[fields]) scen[j, 3] = ( data.Balance.diff()[1:].dot(data.yearfrac.shift()[1:]) / dealdata["currbal"] ) scen[j, 4] = ( np.dot(data.Cashflow, data.df * data.yearfrac) / scen[j, 0] if scen[j, 0] else 0 ) def compute_delta(dist, dealweight, cusip_pv, tradedate, K1=0, K2=1): Ngrid, nT = dist["L"].shape scenariosl = np.zeros((dealweight.size, nT)) scenariosr = np.zeros((dealweight.size, nT)) for t in range(nT): scenariosl[:, t] = interpvalues( dist["L"][:, t], np.linspace(0, 1, Ngrid), dealweight ) scenariosr[:, t] = interpvalues( dist["R"][:, t], np.linspace(0, 1, Ngrid), dealweight ) if __name__ == "__main__": if len(sys.argv) > 1: tradedate = datetime.datetime.strptime(sys.argv[1], "%Y-%m-%d").date() else: tradedate = datetime.date.today() params = processzipfiles(tradedate) cfdata = getdealcf(params["dealnames"], params["zipfiles"], tradedate) cusipdata = getcusipcf(params, cfdata, tradedate)