import os import os.path import pandas as pd from intex_scenarios import dealname_from_cusip from zipfile import ZipFile import re import datetime from dateutil.relativedelta import relativedelta from db import query_db import yaml from quantlib.time.api import Schedule, Actual360, Period, Months, Calendar, Unadjusted from quantlib.util.converter import pydate_to_qldate, qldate_to_pydate from quantlib.settings import Settings from yieldcurve import YC import numpy as np from optimization import KLfit import pdb import ryp2.robjects as ro if os.name =='nt': root = "//WDsentinel/share/CorpCDOs" elif os.name == 'posix': root = '/home/share/CorpCDOs' def sanitize_float(string): try: string = string.replace(",","") if "(" in string: return - float(string[1:-1]) else: return float(string) except AttributeError: return string def processzipfiles(workdate=datetime.date.today()): pricesdir = os.path.join(root, "Scenarios", "Prices_" + str(workdate)) zipfiles = [os.path.join(pricesdir, f) for f in os.listdir(pricesdir) \ if f.endswith(".zip")] zipfiles = sorted(zipfiles, key=lambda x: os.stat(x).st_ctime) cusips_dict = {} dealnames_dict = {} for i, zip in enumerate(zipfiles): with ZipFile(zip) as myzip: allfiles = set([f.filename.split("-")[0] for f in myzip.infolist()]) allfiles = allfiles - {"Total"} dealnames = set([f.filename.split("-")[0] for f in myzip.infolist() \ if re.search("COLLAT.*Scen100", f.filename)]) cusips = allfiles - dealnames dealnames = [d.lower() for d in dealnames] cusips_dict.update((cusip, i) for cusip in cusips) dealnames_dict.update((dealname, i) for dealname in dealnames) return {"dealnames": dealnames_dict, "cusips":cusips_dict, "zipfiles": zipfiles} def get_configfile(dealname, workdate): configfile = os.path.join(root, "Scenarios", "_".join(["Intex curves", str(workdate)]), "csv", dealname + ".config") try: with open(configfile) as fh: config = yaml.load(fh) except FileNotFoundError: config = {"reinvflag": True} return config def get_dist(workdate): distfile = os.path.join(root, "Scenarios", "marketdata-{0}.RData".format(str(workdate))) dist = ro.r.load(distfile) return {"L": np.array(dist[0]),"R": np.array(dist[1])} def get_dealdata(dealname, workdate): sqlstr = "select \"Curr Collat Bal\", \"Reinv End Date\", \"Deal Next Pay Date\"," \ "maturity, \"Principal Bal\", \"Pay Day\" from historical_clo_universe(%s, %s)" sqldata = query_db(sqlstr, (dealname, str(workdate))) return sqldata def get_cusipdata(cusip, workdate): sqlstring = "select curr_balance, spread from historical_cusip_universe(%s, %s)" sqldata = query_db(sqlstr, (cusip, str(workdate))) return sqldata def get_dealschedule(dealdata, freq='1Mo'): us_cal = Calendar.from_name('USA') if not dealdata["Pay Day"]: dealdata["Pay Day"] =dealdata["Deal Next Pay Date"] + relativedelta(months=-3) if abs((dealdata["Deal Next Pay Date"]- dealdata["Pay Day"]).days - 90)>10: dealdata["Pay Day"] = dealdata["Deal Next Pay Date"] + relativedelta(months=-6) return Schedule(pydate_to_qldate(dealdata["Pay Day"]), pydate_to_qldate(dealdata["maturity"]), Period(freq), us_cal, Unadjusted, Unadjusted) def getdealcf(dealnames, zipfiles, workdate = datetime.date.today()): fiels = ["Cashflow", "Principal", "Interest"] n_scenarios = 100 cfdata = {} m = YC(workdate) fields = ["Cashflow", "Principal", "Interest"] for dealname, i in dealnames.items(): zip = zipfiles[i] cfdata[dealname]={} mv = query_db("select marketvalue from latest_deal_model_numbers where dealname = %s", (dealname,))[0] dealdata = get_dealdata(dealname, workdate) cfdata[dealname] = {"mv":mv, "currbal": dealdata["Curr Collat Bal"]} config = get_configfile(dealname, workdate) sched = get_dealschedule(dealdata) pd_sched = [pd.to_datetime(str(day), format="%d/%m/%Y") for day in sched] discounts = pd.Series([m.discount(day) if day > m.settle_date else 1 for day in sched], index = pd_sched, name='df') if (not sqldata["Reinv End Date"] or not config["reinvflag"]): tranches = ["COLLAT"] else: tranches = ["COLLAT_INITIAL", "COLLAT_REINVEST"] cf = {} with ZipFile(zip) as myzip: for tranche in tranches: scen = {} for j in range(1, n_scenarios+1): filename = "{0}-{1}-CF-Scen{2}.txt".format(dealname.upper(), tranche, j) data = pd.read_table(myzip.open(filename), skiprows=[1, 2], parse_dates=[0], thousands=",", date_parser = lambda x: datetime.datetime.strptime(x, "%b %d, %Y"), index_col=0) for c in fields: if data.dtypes[c] != np.dtype('float64'): data[c] = data[c].apply(sanitize_float) data = data[fields].join(discounts) scen[j] = np.dot(data["df"], data[fields]) cf[tranche] = pd.DataFrame(scen).T cf[tranche].columns = fields cfdata[dealname]["panel"] = pd.Panel(cf) cf = cfdata[dealname]["panel"].minor_xs("Cashflow").T.sum() cfdata[dealname]["wapbasis"] = (cf.mean() - mv)/mv program = KLfit(cf.values/1e8, np.ones(n_scenarios)/n_scenarios, mv/1e8) cfdata[dealname]["weight"] = program["weight"] print(dealname) return cfdata def getcusipcf(dealnames, cusip, cfdata, workdate): dist = get_dist(workdate) cusipdata = {} n_scenarios = 100 m = YC(workdate) for i, cusip in enumerate(cusips): zip = zipfiles[i] dealname = dealnames[i] dealdata = getdealdata(dealname) sched = getdealschedule(dealdata) pd_sched = [pd.to_datetime(str(day), format="%d/%m/%Y") for day in sched] discounts = pd.Series([m.discount(day) if day > m.settle_date else 1 for day in sched], index = pd_sched, name='df') for j in range(1, n_scenarios): if __name__=="__main__": #workdate = datetime.date.today() workdate = datetime.datetime.strptime("2014-03-19", "%Y-%m-%d").date() params = processzipfiles(workdate) pomme = getdealcf(params["dealnames"], params["zipfiles"], workdate) pdb.set_trace()