aboutsummaryrefslogtreecommitdiffstats
path: root/python/load_cf.py
blob: cdbb4b2ee72abacdc1e212baea885056e9b3fad5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import os
import os.path
import pandas as pd
from pandas.tseries.offsets import BDay, DateOffset
import numpy as np

from intex_scenarios import dealname_from_cusip
from zipfile import ZipFile
import re, datetime, yaml

from db import query_db, conn
from quantlib.time.api import Schedule, Actual360, Period, Months, Calendar, Unadjusted, ModifiedFollowing, Date
from quantlib.util.converter import pydate_to_qldate, qldate_to_pydate
from quantlib.settings import Settings
from yieldcurve import YC
from optimization import KLfit
import rpy2.robjects as ro
import sys

def sanitize_float(string):
    try:
        string = string.replace(",","")
        if "(" in string:
            return - float(string[1:-1])
        else:
            return float(string)
    except AttributeError:
        return string


def processzipfiles(tradedate=datetime.date.today()):
    pricesdir = os.path.join(os.environ['BASE_DIR'], "Scenarios", "Prices_" + str(tradedate))
    zipfiles = [os.path.join(pricesdir, f) for f in os.listdir(pricesdir) \
                if f.endswith(".zip")]
    zipfiles = sorted(zipfiles, key=lambda x: os.stat(x).st_ctime)
    cusips_dict = {}
    dealnames_dict = {}
    for i, zip in enumerate(zipfiles):
        with ZipFile(zip) as myzip:
            allfiles = set([f.filename.split("-")[0] for f in myzip.infolist()])
            allfiles = allfiles - {"Total"}
            dealnames = set([f.filename.split("-")[0] for f in myzip.infolist() \
                             if re.search("COLLAT.*Scen100", f.filename)])
            cusips = allfiles - dealnames
            dealnames = [d.lower() for d in dealnames]
            cusips_dict.update((cusip, i) for cusip in cusips)
            dealnames_dict.update((dealname, i) for dealname in dealnames)
    return {"dealnames": dealnames_dict, "cusips":cusips_dict, "zipfiles": zipfiles}


def get_configfile(dealname, tradedate):
    configfile = os.path.join(os.environ['BASE_DIR'], "Scenarios", "Intex curves_" + str(tradedate),
                              "csv", dealname + ".config")
    try:
        with open(configfile) as fh:
            config = yaml.load(fh)
    except FileNotFoundError:
        config = {"reinvflag": True}
    return config

def get_dist(date):
    distfile = os.path.join(os.environ['BASE_DIR'], "Scenarios", "Calibration",
                            "marketdata-{0:%Y-%m-%d}.RData".format(date))
    dist = ro.r.load(distfile)
    return {"L": np.array(dist[0]),"R": np.array(dist[1])}

def get_dealdata(dealname, tradedate):
    sqlstr = "select \"Curr Collat Bal\" AS currbal, reinv_end_date, first_pay_date," \
             "maturity, \"Principal Bal\", pay_day  from historical_clo_universe(%s, %s)"
    data = {k: v for k, v in query_db(sqlstr, (dealname, tradedate)).items()}
    data["mv"] = query_db("select marketvalue from latest_deal_model_numbers where dealname = %s",
                  (dealname,))[0]
    return data

def get_cusipdata(cusip, tradedate):
    sqlstring = "select curr_balance, spread from historical_cusip_universe(%s, %s)"
    sqldata = query_db(sqlstr, (cusip, str(tradedate)))
    return sqldata

def get_dealschedule(dealdata, freq='1Mo', adj=Unadjusted):
    us_cal = Calendar.from_name('USA')
    if not dealdata["Pay Day"] or abs((dealdata["Deal Next Pay Date"]- dealdata["Pay Day"]).days - 90)>10:
        dealdata["Pay Day"] = dealdata["Deal Next Pay Date"] - DateOffset(months=3)
    return Schedule(pydate_to_qldate(dealdata["Pay Day"]), pydate_to_qldate(dealdata["maturity"]),
                     Period(freq), us_cal, adj, adj)

def dealname_from_cusip(conn, cusips):
    with conn.cursor() as c:
        c.callproc("dealname_from_cusip", cusips)
        dealnames = [d[0] for d in c]
    return dealnames

def discounts(tradedate):
    calibration_date = tradedate-BDay(1)
    m = YC(calibration_date)
    alldates = pd.date_range(pd.to_datetime(m.reference_date, format='%d/%m/%Y'),
                             pd.to_datetime(m.max_date, format='%d/%m/%Y'))
    alldates_ql = [Date(d.day, d.month, d.year) for d in alldates]
    df = [m.discount(day) for day in alldates_ql]
    yearfrac = alldates.to_series()-alldates[0]
    yearfrac = yearfrac.astype('timedelta64[D]')/365
    return pd.DataFrame({'yearfrac':yearfrac, 'df': df}, index=alldates)

def getdealcf(dealnames, zipfiles, tradedate = datetime.date.today()):
    n_scenarios = 100
    cfdata = {}
    discounts_table= discounts(tradedate)
    fields = ["Cashflow", "Principal", "Interest"]
    for dealname, i in dealnames.items():
        zipfile = zipfiles[i]
        dealdata = get_dealdata(dealname, tradedate)
        cfdata[dealname] = {k: dealdata[k] for k in ["mv", "currbal"]}
        config = get_configfile(dealname, tradedate)

        if (not dealdata["reinv_end_date"] or not config["reinvflag"]):
             tranches = ["COLLAT"]
        else:
            tranches = ["COLLAT_INITIAL", "COLLAT_REINVEST"]

        cf = {}
        with ZipFile(zipfile) as myzip:
            for tranche in tranches:
                scen = np.zeros((100,3))
                for j in range(n_scenarios):
                    filename = "{0}-{1}-CF-Scen{2}.txt".format(dealname.upper(), tranche, j+1)
                    data = pd.read_table(myzip.open(filename), skiprows=[1, 2], parse_dates=[0],
                                         thousands=",",
                                         date_parser = lambda x: datetime.datetime.strptime(x, "%b %d, %Y"),
                                         index_col=0)

                    for c in fields:
                        if data.dtypes[c] != np.dtype('float64'):
                            data[c] = data[c].apply(sanitize_float)
                    data = data[fields].join(discounts_table)
                    scen[j,:] = np.dot(data.df, data[fields])
                cf[tranche] = pd.DataFrame(scen, columns=fields)
        cfdata[dealname]["panel"] = pd.concat(cf)
        cf = cfdata[dealname]["panel"].Cashflow.sum(level=1)
        cfdata[dealname]["wapbasis"] = (cf.mean() - dealdata["mv"])/dealdata["mv"]
        program = KLfit(cf.values/1e8, np.ones(n_scenarios)/n_scenarios, dealdata["mv"]/1e8)
        cfdata[dealname]["weight"] = program["weight"]
        print(dealname)
    return cfdata

def getcusipcf(params, cfdata, tradedate):
    calibration_date = tradedate-BDay(1)
    dist = get_dist(calibration_date)
    cusipdata = {}
    n_scenarios = 100
    intexfields = ["Cashflow", "Principal", "Interest", "Balance", "Accum Interest Shortfall"]
    fields = ["Cashflow", "Principal", "Interest"]
    cusips = list(params["cusips"].keys())
    dealnames = dealname_from_cusip(conn, cusips)
    cusips = [c for c, d in zip(cusips, dealnames) if d in cfdata.keys()]
    discounts_table = discounts(tradedate)
    for i, cusip in enumerate(cusips):
        zipfile = params["zipfiles"][params["cusips"][cusip]]
        dealname = dealnames[i]
        dealdata = get_dealdata(dealname, tradedate)
        with ZipFile(zipfile) as myzip:
            scen = np.zeros((n_scenarios, 5))
            for j in range(n_scenarios):
                filename = "{0}-CF-Scen{1}.txt".format(cusip, j+1)
                data = pd.read_table(myzip.open(filename), skiprows=[1, 2], parse_dates=[0],
                                     thousands = ",",
                                     date_parser = lambda x: datetime.datetime.strptime(x, "%b %d, %Y"),
                                     index_col = 0,
                                     usecols=range(6))
                for c in intexfields:
                    if data.dtypes[c] != np.dtype('float64'):
                        data[c] = data[c].apply(sanitize_float)
                data["Balance"] = np.maximum(data.Balance - data["Accum Interest Shortfall"], 0)
                data = data.join(discounts_table)
                scen[j,:3] = data.df.dot(data[fields])
                scen[j,3] = data.Balance.diff()[1:].dot(data.yearfrac.shift()[1:])/dealdata["currbal"]
                scen[j,4] = np.dot(data.Cashflow, data.df * data.yearfrac)/scen[j,0] if scen[j,0]  else 0

def compute_delta(dist, dealweight, cusip_pv, tradedate, K1 = 0, K2 = 1):
    Ngrid, nT = dist["L"].shape
    scenariosl = np.zeros((dealweight.size, nT))
    scenariosr = np.zeros((dealweight.size, nT))
    for t in range(nT):
        scenariosl[:,t] = interpvalues(dist["L"][:,t], np.linspace(0, 1, Ngrid), dealweight)
        scenariosr[:,t] = interpvalues(dist["R"][:,t], np.linspace(0, 1, Ngrid), dealweight)


if __name__=="__main__":
    if len(sys.argv)>1:
        tradedate = datetime.datetime.strptime(sys.argv[1], "%Y-%m-%d").date()
    else:
        tradedate = datetime.date.today()
    params = processzipfiles(tradedate)
    cfdata = getdealcf(params["dealnames"], params["zipfiles"], tradedate)
    cusipdata = getcusipcf(params, cfdata, tradedate)