aboutsummaryrefslogtreecommitdiffstats
path: root/python/load_cf.py
blob: e198b7e7164a3f3e5e9a02912256f64fe7bd27de (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
import os
import os.path
import pandas as pd
from pandas.tseries.offsets import BDay, DateOffset
import numpy as np

from intex_scenarios import dealname_from_cusip
from zipfile import ZipFile
import re, datetime, yaml

from db import query_db, conn
from quantlib.time.api import (
    Schedule,
    Actual360,
    Period,
    Months,
    Calendar,
    Unadjusted,
    ModifiedFollowing,
    Date,
)
from quantlib.util.converter import pydate_to_qldate, qldate_to_pydate
from quantlib.settings import Settings
from yieldcurve import YC
from optimization import KLfit
import rpy2.robjects as ro
import sys


def sanitize_float(string):
    try:
        string = string.replace(",", "")
        if "(" in string:
            return -float(string[1:-1])
        else:
            return float(string)
    except AttributeError:
        return string


def processzipfiles(tradedate=datetime.date.today()):
    pricesdir = os.path.join(
        os.environ["BASE_DIR"], "Scenarios", "Prices_" + str(tradedate)
    )
    zipfiles = [
        os.path.join(pricesdir, f) for f in os.listdir(pricesdir) if f.endswith(".zip")
    ]
    zipfiles = sorted(zipfiles, key=lambda x: os.stat(x).st_ctime)
    cusips_dict = {}
    dealnames_dict = {}
    for i, zf in enumerate(zipfiles):
        with ZipFile(zf) as myzip:
            allfiles = set([f.filename.split("-")[0] for f in myzip.infolist()])
            allfiles = allfiles - {"Total"}
            dealnames = set(
                [
                    f.filename.split("-")[0]
                    for f in myzip.infolist()
                    if re.search("COLLAT.*Scen100", f.filename)
                ]
            )
            cusips = allfiles - dealnames
            dealnames = [d.lower() for d in dealnames]
            cusips_dict.update((cusip, i) for cusip in cusips)
            dealnames_dict.update((dealname, i) for dealname in dealnames)
    return {"dealnames": dealnames_dict, "cusips": cusips_dict, "zipfiles": zipfiles}


def get_configfile(dealname, tradedate):
    configfile = os.path.join(
        os.environ["BASE_DIR"],
        "Scenarios",
        "Intex curves_" + str(tradedate),
        "csv",
        dealname + ".config",
    )
    try:
        with open(configfile) as fh:
            config = yaml.load(fh, Loader=yaml.FullLoader)
    except FileNotFoundError:
        config = {"reinvflag": True}
    return config


def get_dist(date):
    distfile = os.path.join(
        os.environ["BASE_DIR"],
        "Scenarios",
        "Calibration",
        "marketdata-{0:%Y-%m-%d}.RData".format(date),
    )
    dist = ro.r.load(distfile)
    return {"L": np.array(dist[0]), "R": np.array(dist[1])}


def get_dealdata(dealname, tradedate):
    sqlstr = (
        'select "Curr Collat Bal" AS currbal, reinv_end_date, first_pay_date,'
        'maturity, "Principal Bal", pay_day  from historical_clo_universe(%s, %s)'
    )
    data = {k: v for k, v in query_db(sqlstr, (dealname, tradedate)).items()}
    data["mv"] = query_db(
        "select marketvalue from latest_deal_model_numbers where dealname = %s",
        (dealname,),
    )[0]
    return data


def get_cusipdata(cusip, tradedate):
    sqlstring = "select curr_balance, spread from historical_cusip_universe(%s, %s)"
    sqldata = query_db(sqlstr, (cusip, str(tradedate)))
    return sqldata


def get_dealschedule(dealdata, freq="1Mo", adj=Unadjusted):
    us_cal = Calendar.from_name("USA")
    if (
        not dealdata["Pay Day"]
        or abs((dealdata["Deal Next Pay Date"] - dealdata["Pay Day"]).days - 90) > 10
    ):
        dealdata["Pay Day"] = dealdata["Deal Next Pay Date"] - DateOffset(months=3)
    return Schedule(
        pydate_to_qldate(dealdata["Pay Day"]),
        pydate_to_qldate(dealdata["maturity"]),
        Period(freq),
        us_cal,
        adj,
        adj,
    )


def dealname_from_cusip(conn, cusips):
    with conn.cursor() as c:
        c.callproc("dealname_from_cusip", cusips)
        dealnames = [d[0] for d in c]
    return dealnames


def discounts(tradedate):
    calibration_date = tradedate - BDay(1)
    m = YC(calibration_date)
    alldates = pd.date_range(
        pd.to_datetime(m.reference_date, format="%d/%m/%Y"),
        pd.to_datetime(m.max_date, format="%d/%m/%Y"),
    )
    alldates_ql = [Date(d.day, d.month, d.year) for d in alldates]
    df = [m.discount(day) for day in alldates_ql]
    yearfrac = alldates.to_series() - alldates[0]
    yearfrac = yearfrac.astype("timedelta64[D]") / 365
    return pd.DataFrame({"yearfrac": yearfrac, "df": df}, index=alldates)


def getdealcf(dealnames, zipfiles, tradedate=datetime.date.today()):
    n_scenarios = 100
    cfdata = {}
    discounts_table = discounts(tradedate)
    fields = ["Cashflow", "Principal", "Interest"]
    for dealname, i in dealnames.items():
        zipfile = zipfiles[i]
        dealdata = get_dealdata(dealname, tradedate)
        cfdata[dealname] = {k: dealdata[k] for k in ["mv", "currbal"]}
        config = get_configfile(dealname, tradedate)

        if not dealdata["reinv_end_date"] or not config["reinvflag"]:
            tranches = ["COLLAT"]
        else:
            tranches = ["COLLAT_INITIAL", "COLLAT_REINVEST"]

        cf = {}
        with ZipFile(zipfile) as myzip:
            for tranche in tranches:
                scen = np.zeros((100, 3))
                for j in range(n_scenarios):
                    filename = "{0}-{1}-CF-Scen{2}.txt".format(
                        dealname.upper(), tranche, j + 1
                    )
                    data = pd.read_table(
                        myzip.open(filename),
                        skiprows=[1, 2],
                        parse_dates=[0],
                        thousands=",",
                        date_parser=lambda x: datetime.datetime.strptime(
                            x, "%b %d, %Y"
                        ),
                        index_col=0,
                    )

                    for c in fields:
                        if data.dtypes[c] != np.dtype("float64"):
                            data[c] = data[c].apply(sanitize_float)
                    data = data[fields].join(discounts_table)
                    scen[j, :] = np.dot(data.df, data[fields])
                cf[tranche] = pd.DataFrame(scen, columns=fields)
        cfdata[dealname]["panel"] = pd.concat(cf)
        cf = cfdata[dealname]["panel"].Cashflow.sum(level=1)
        cfdata[dealname]["wapbasis"] = (cf.mean() - dealdata["mv"]) / dealdata["mv"]
        program = KLfit(
            cf.values / 1e8, np.ones(n_scenarios) / n_scenarios, dealdata["mv"] / 1e8
        )
        cfdata[dealname]["weight"] = program["weight"]
        print(dealname)
    return cfdata


def getcusipcf(params, cfdata, tradedate):
    calibration_date = tradedate - BDay(1)
    dist = get_dist(calibration_date)
    cusipdata = {}
    n_scenarios = 100
    intexfields = [
        "Cashflow",
        "Principal",
        "Interest",
        "Balance",
        "Accum Interest Shortfall",
    ]
    fields = ["Cashflow", "Principal", "Interest"]
    cusips = list(params["cusips"].keys())
    dealnames = dealname_from_cusip(conn, cusips)
    cusips = [c for c, d in zip(cusips, dealnames) if d in cfdata.keys()]
    discounts_table = discounts(tradedate)
    for i, cusip in enumerate(cusips):
        zipfile = params["zipfiles"][params["cusips"][cusip]]
        dealname = dealnames[i]
        dealdata = get_dealdata(dealname, tradedate)
        with ZipFile(zipfile) as myzip:
            scen = np.zeros((n_scenarios, 5))
            for j in range(n_scenarios):
                filename = "{0}-CF-Scen{1}.txt".format(cusip, j + 1)
                data = pd.read_table(
                    myzip.open(filename),
                    skiprows=[1, 2],
                    parse_dates=[0],
                    thousands=",",
                    date_parser=lambda x: datetime.datetime.strptime(x, "%b %d, %Y"),
                    index_col=0,
                    usecols=range(6),
                )
                for c in intexfields:
                    if data.dtypes[c] != np.dtype("float64"):
                        data[c] = data[c].apply(sanitize_float)
                data["Balance"] = np.maximum(
                    data.Balance - data["Accum Interest Shortfall"], 0
                )
                data = data.join(discounts_table)
                scen[j, :3] = data.df.dot(data[fields])
                scen[j, 3] = (
                    data.Balance.diff()[1:].dot(data.yearfrac.shift()[1:])
                    / dealdata["currbal"]
                )
                scen[j, 4] = (
                    np.dot(data.Cashflow, data.df * data.yearfrac) / scen[j, 0]
                    if scen[j, 0]
                    else 0
                )


def compute_delta(dist, dealweight, cusip_pv, tradedate, K1=0, K2=1):
    Ngrid, nT = dist["L"].shape
    scenariosl = np.zeros((dealweight.size, nT))
    scenariosr = np.zeros((dealweight.size, nT))
    for t in range(nT):
        scenariosl[:, t] = interpvalues(
            dist["L"][:, t], np.linspace(0, 1, Ngrid), dealweight
        )
        scenariosr[:, t] = interpvalues(
            dist["R"][:, t], np.linspace(0, 1, Ngrid), dealweight
        )


if __name__ == "__main__":
    if len(sys.argv) > 1:
        tradedate = datetime.datetime.strptime(sys.argv[1], "%Y-%m-%d").date()
    else:
        tradedate = datetime.date.today()
    params = processzipfiles(tradedate)
    cfdata = getdealcf(params["dealnames"], params["zipfiles"], tradedate)
    cusipdata = getcusipcf(params, cfdata, tradedate)