import os import os.path import pandas as pd from pandas.tseries.offsets import BDay, DateOffset import numpy as np from intex_scenarios import dealname_from_cusip from zipfile import ZipFile import re, datetime, yaml from db import query_db, conn from quantlib.time.api import Schedule, Actual360, Period, Months, Calendar, Unadjusted, ModifiedFollowing, Date from quantlib.util.converter import pydate_to_qldate, qldate_to_pydate from quantlib.settings import Settings from yieldcurve import YC from optimization import KLfit import rpy2.robjects as ro import sys def sanitize_float(string): try: string = string.replace(",","") if "(" in string: return - float(string[1:-1]) else: return float(string) except AttributeError: return string def processzipfiles(tradedate=datetime.date.today()): pricesdir = os.path.join(os.environ['BASE_DIR'], "Scenarios", "Prices_" + str(tradedate)) zipfiles = [os.path.join(pricesdir, f) for f in os.listdir(pricesdir) \ if f.endswith(".zip")] zipfiles = sorted(zipfiles, key=lambda x: os.stat(x).st_ctime) cusips_dict = {} dealnames_dict = {} for i, zf in enumerate(zipfiles): with ZipFile(zf) as myzip: allfiles = set([f.filename.split("-")[0] for f in myzip.infolist()]) allfiles = allfiles - {"Total"} dealnames = set([f.filename.split("-")[0] for f in myzip.infolist() \ if re.search("COLLAT.*Scen100", f.filename)]) cusips = allfiles - dealnames dealnames = [d.lower() for d in dealnames] cusips_dict.update((cusip, i) for cusip in cusips) dealnames_dict.update((dealname, i) for dealname in dealnames) return {"dealnames": dealnames_dict, "cusips":cusips_dict, "zipfiles": zipfiles} def get_configfile(dealname, tradedate): configfile = os.path.join(os.environ['BASE_DIR'], "Scenarios", "Intex curves_" + str(tradedate), "csv", dealname + ".config") try: with open(configfile) as fh: config = yaml.load(fh) except FileNotFoundError: config = {"reinvflag": True} return config def get_dist(date): distfile = os.path.join(os.environ['BASE_DIR'], "Scenarios", "Calibration", "marketdata-{0:%Y-%m-%d}.RData".format(date)) dist = ro.r.load(distfile) return {"L": np.array(dist[0]),"R": np.array(dist[1])} def get_dealdata(dealname, tradedate): sqlstr = "select \"Curr Collat Bal\" AS currbal, reinv_end_date, first_pay_date," \ "maturity, \"Principal Bal\", pay_day from historical_clo_universe(%s, %s)" data = {k: v for k, v in query_db(sqlstr, (dealname, tradedate)).items()} data["mv"] = query_db("select marketvalue from latest_deal_model_numbers where dealname = %s", (dealname,))[0] return data def get_cusipdata(cusip, tradedate): sqlstring = "select curr_balance, spread from historical_cusip_universe(%s, %s)" sqldata = query_db(sqlstr, (cusip, str(tradedate))) return sqldata def get_dealschedule(dealdata, freq='1Mo', adj=Unadjusted): us_cal = Calendar.from_name('USA') if not dealdata["Pay Day"] or abs((dealdata["Deal Next Pay Date"]- dealdata["Pay Day"]).days - 90)>10: dealdata["Pay Day"] = dealdata["Deal Next Pay Date"] - DateOffset(months=3) return Schedule(pydate_to_qldate(dealdata["Pay Day"]), pydate_to_qldate(dealdata["maturity"]), Period(freq), us_cal, adj, adj) def dealname_from_cusip(conn, cusips): with conn.cursor() as c: c.callproc("dealname_from_cusip", cusips) dealnames = [d[0] for d in c] return dealnames def discounts(tradedate): calibration_date = tradedate-BDay(1) m = YC(calibration_date) alldates = pd.date_range(pd.to_datetime(m.reference_date, format='%d/%m/%Y'), pd.to_datetime(m.max_date, format='%d/%m/%Y')) alldates_ql = [Date(d.day, d.month, d.year) for d in alldates] df = [m.discount(day) for day in alldates_ql] yearfrac = alldates.to_series()-alldates[0] yearfrac = yearfrac.astype('timedelta64[D]')/365 return pd.DataFrame({'yearfrac':yearfrac, 'df': df}, index=alldates) def getdealcf(dealnames, zipfiles, tradedate = datetime.date.today()): n_scenarios = 100 cfdata = {} discounts_table= discounts(tradedate) fields = ["Cashflow", "Principal", "Interest"] for dealname, i in dealnames.items(): zipfile = zipfiles[i] dealdata = get_dealdata(dealname, tradedate) cfdata[dealname] = {k: dealdata[k] for k in ["mv", "currbal"]} config = get_configfile(dealname, tradedate) if (not dealdata["reinv_end_date"] or not config["reinvflag"]): tranches = ["COLLAT"] else: tranches = ["COLLAT_INITIAL", "COLLAT_REINVEST"] cf = {} with ZipFile(zipfile) as myzip: for tranche in tranches: scen = np.zeros((100,3)) for j in range(n_scenarios): filename = "{0}-{1}-CF-Scen{2}.txt".format(dealname.upper(), tranche, j+1) data = pd.read_table(myzip.open(filename), skiprows=[1, 2], parse_dates=[0], thousands=",", date_parser = lambda x: datetime.datetime.strptime(x, "%b %d, %Y"), index_col=0) for c in fields: if data.dtypes[c] != np.dtype('float64'): data[c] = data[c].apply(sanitize_float) data = data[fields].join(discounts_table) scen[j,:] = np.dot(data.df, data[fields]) cf[tranche] = pd.DataFrame(scen, columns=fields) cfdata[dealname]["panel"] = pd.concat(cf) cf = cfdata[dealname]["panel"].Cashflow.sum(level=1) cfdata[dealname]["wapbasis"] = (cf.mean() - dealdata["mv"])/dealdata["mv"] program = KLfit(cf.values/1e8, np.ones(n_scenarios)/n_scenarios, dealdata["mv"]/1e8) cfdata[dealname]["weight"] = program["weight"] print(dealname) return cfdata def getcusipcf(params, cfdata, tradedate): calibration_date = tradedate-BDay(1) dist = get_dist(calibration_date) cusipdata = {} n_scenarios = 100 intexfields = ["Cashflow", "Principal", "Interest", "Balance", "Accum Interest Shortfall"] fields = ["Cashflow", "Principal", "Interest"] cusips = list(params["cusips"].keys()) dealnames = dealname_from_cusip(conn, cusips) cusips = [c for c, d in zip(cusips, dealnames) if d in cfdata.keys()] discounts_table = discounts(tradedate) for i, cusip in enumerate(cusips): zipfile = params["zipfiles"][params["cusips"][cusip]] dealname = dealnames[i] dealdata = get_dealdata(dealname, tradedate) with ZipFile(zipfile) as myzip: scen = np.zeros((n_scenarios, 5)) for j in range(n_scenarios): filename = "{0}-CF-Scen{1}.txt".format(cusip, j+1) data = pd.read_table(myzip.open(filename), skiprows=[1, 2], parse_dates=[0], thousands = ",", date_parser = lambda x: datetime.datetime.strptime(x, "%b %d, %Y"), index_col = 0, usecols=range(6)) for c in intexfields: if data.dtypes[c] != np.dtype('float64'): data[c] = data[c].apply(sanitize_float) data["Balance"] = np.maximum(data.Balance - data["Accum Interest Shortfall"], 0) data = data.join(discounts_table) scen[j,:3] = data.df.dot(data[fields]) scen[j,3] = data.Balance.diff()[1:].dot(data.yearfrac.shift()[1:])/dealdata["currbal"] scen[j,4] = np.dot(data.Cashflow, data.df * data.yearfrac)/scen[j,0] if scen[j,0] else 0 def compute_delta(dist, dealweight, cusip_pv, tradedate, K1 = 0, K2 = 1): Ngrid, nT = dist["L"].shape scenariosl = np.zeros((dealweight.size, nT)) scenariosr = np.zeros((dealweight.size, nT)) for t in range(nT): scenariosl[:,t] = interpvalues(dist["L"][:,t], np.linspace(0, 1, Ngrid), dealweight) scenariosr[:,t] = interpvalues(dist["R"][:,t], np.linspace(0, 1, Ngrid), dealweight) if __name__=="__main__": if len(sys.argv)>1: tradedate = datetime.datetime.strptime(sys.argv[1], "%Y-%m-%d").date() else: tradedate = datetime.date.today() params = processzipfiles(tradedate) cfdata = getdealcf(params["dealnames"], params["zipfiles"], tradedate) cusipdata = getcusipcf(params, cfdata, tradedate)