1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
|
import os
import os.path
import pandas as pd
from pandas.tseries.offsets import BDay, DateOffset
import numpy as np
from intex_scenarios import dealname_from_cusip
from zipfile import ZipFile
import re, datetime, yaml
from db import query_db, conn
from quantlib.time.api import Schedule, Actual360, Period, Months, Calendar, Unadjusted, ModifiedFollowing, Date
from quantlib.util.converter import pydate_to_qldate, qldate_to_pydate
from quantlib.settings import Settings
from yieldcurve import YC
from optimization import KLfit
import rpy2.robjects as ro
import sys
def sanitize_float(string):
try:
string = string.replace(",","")
if "(" in string:
return - float(string[1:-1])
else:
return float(string)
except AttributeError:
return string
def processzipfiles(tradedate=datetime.date.today()):
pricesdir = os.path.join(os.environ['BASE_DIR'], "Scenarios", "Prices_" + str(tradedate))
zipfiles = [os.path.join(pricesdir, f) for f in os.listdir(pricesdir) \
if f.endswith(".zip")]
zipfiles = sorted(zipfiles, key=lambda x: os.stat(x).st_ctime)
cusips_dict = {}
dealnames_dict = {}
for i, zf in enumerate(zipfiles):
with ZipFile(zf) as myzip:
allfiles = set([f.filename.split("-")[0] for f in myzip.infolist()])
allfiles = allfiles - {"Total"}
dealnames = set([f.filename.split("-")[0] for f in myzip.infolist() \
if re.search("COLLAT.*Scen100", f.filename)])
cusips = allfiles - dealnames
dealnames = [d.lower() for d in dealnames]
cusips_dict.update((cusip, i) for cusip in cusips)
dealnames_dict.update((dealname, i) for dealname in dealnames)
return {"dealnames": dealnames_dict, "cusips":cusips_dict, "zipfiles": zipfiles}
def get_configfile(dealname, tradedate):
configfile = os.path.join(os.environ['BASE_DIR'], "Scenarios", "Intex curves_" + str(tradedate),
"csv", dealname + ".config")
try:
with open(configfile) as fh:
config = yaml.load(fh, Loader=yaml.FullLoader)
except FileNotFoundError:
config = {"reinvflag": True}
return config
def get_dist(date):
distfile = os.path.join(os.environ['BASE_DIR'], "Scenarios", "Calibration",
"marketdata-{0:%Y-%m-%d}.RData".format(date))
dist = ro.r.load(distfile)
return {"L": np.array(dist[0]),"R": np.array(dist[1])}
def get_dealdata(dealname, tradedate):
sqlstr = "select \"Curr Collat Bal\" AS currbal, reinv_end_date, first_pay_date," \
"maturity, \"Principal Bal\", pay_day from historical_clo_universe(%s, %s)"
data = {k: v for k, v in query_db(sqlstr, (dealname, tradedate)).items()}
data["mv"] = query_db("select marketvalue from latest_deal_model_numbers where dealname = %s",
(dealname,))[0]
return data
def get_cusipdata(cusip, tradedate):
sqlstring = "select curr_balance, spread from historical_cusip_universe(%s, %s)"
sqldata = query_db(sqlstr, (cusip, str(tradedate)))
return sqldata
def get_dealschedule(dealdata, freq='1Mo', adj=Unadjusted):
us_cal = Calendar.from_name('USA')
if not dealdata["Pay Day"] or abs((dealdata["Deal Next Pay Date"]- dealdata["Pay Day"]).days - 90)>10:
dealdata["Pay Day"] = dealdata["Deal Next Pay Date"] - DateOffset(months=3)
return Schedule(pydate_to_qldate(dealdata["Pay Day"]), pydate_to_qldate(dealdata["maturity"]),
Period(freq), us_cal, adj, adj)
def dealname_from_cusip(conn, cusips):
with conn.cursor() as c:
c.callproc("dealname_from_cusip", cusips)
dealnames = [d[0] for d in c]
return dealnames
def discounts(tradedate):
calibration_date = tradedate-BDay(1)
m = YC(calibration_date)
alldates = pd.date_range(pd.to_datetime(m.reference_date, format='%d/%m/%Y'),
pd.to_datetime(m.max_date, format='%d/%m/%Y'))
alldates_ql = [Date(d.day, d.month, d.year) for d in alldates]
df = [m.discount(day) for day in alldates_ql]
yearfrac = alldates.to_series()-alldates[0]
yearfrac = yearfrac.astype('timedelta64[D]')/365
return pd.DataFrame({'yearfrac':yearfrac, 'df': df}, index=alldates)
def getdealcf(dealnames, zipfiles, tradedate = datetime.date.today()):
n_scenarios = 100
cfdata = {}
discounts_table= discounts(tradedate)
fields = ["Cashflow", "Principal", "Interest"]
for dealname, i in dealnames.items():
zipfile = zipfiles[i]
dealdata = get_dealdata(dealname, tradedate)
cfdata[dealname] = {k: dealdata[k] for k in ["mv", "currbal"]}
config = get_configfile(dealname, tradedate)
if (not dealdata["reinv_end_date"] or not config["reinvflag"]):
tranches = ["COLLAT"]
else:
tranches = ["COLLAT_INITIAL", "COLLAT_REINVEST"]
cf = {}
with ZipFile(zipfile) as myzip:
for tranche in tranches:
scen = np.zeros((100,3))
for j in range(n_scenarios):
filename = "{0}-{1}-CF-Scen{2}.txt".format(dealname.upper(), tranche, j+1)
data = pd.read_table(myzip.open(filename), skiprows=[1, 2], parse_dates=[0],
thousands=",",
date_parser = lambda x: datetime.datetime.strptime(x, "%b %d, %Y"),
index_col=0)
for c in fields:
if data.dtypes[c] != np.dtype('float64'):
data[c] = data[c].apply(sanitize_float)
data = data[fields].join(discounts_table)
scen[j,:] = np.dot(data.df, data[fields])
cf[tranche] = pd.DataFrame(scen, columns=fields)
cfdata[dealname]["panel"] = pd.concat(cf)
cf = cfdata[dealname]["panel"].Cashflow.sum(level=1)
cfdata[dealname]["wapbasis"] = (cf.mean() - dealdata["mv"])/dealdata["mv"]
program = KLfit(cf.values/1e8, np.ones(n_scenarios)/n_scenarios, dealdata["mv"]/1e8)
cfdata[dealname]["weight"] = program["weight"]
print(dealname)
return cfdata
def getcusipcf(params, cfdata, tradedate):
calibration_date = tradedate-BDay(1)
dist = get_dist(calibration_date)
cusipdata = {}
n_scenarios = 100
intexfields = ["Cashflow", "Principal", "Interest", "Balance", "Accum Interest Shortfall"]
fields = ["Cashflow", "Principal", "Interest"]
cusips = list(params["cusips"].keys())
dealnames = dealname_from_cusip(conn, cusips)
cusips = [c for c, d in zip(cusips, dealnames) if d in cfdata.keys()]
discounts_table = discounts(tradedate)
for i, cusip in enumerate(cusips):
zipfile = params["zipfiles"][params["cusips"][cusip]]
dealname = dealnames[i]
dealdata = get_dealdata(dealname, tradedate)
with ZipFile(zipfile) as myzip:
scen = np.zeros((n_scenarios, 5))
for j in range(n_scenarios):
filename = "{0}-CF-Scen{1}.txt".format(cusip, j+1)
data = pd.read_table(myzip.open(filename), skiprows=[1, 2], parse_dates=[0],
thousands = ",",
date_parser = lambda x: datetime.datetime.strptime(x, "%b %d, %Y"),
index_col = 0,
usecols=range(6))
for c in intexfields:
if data.dtypes[c] != np.dtype('float64'):
data[c] = data[c].apply(sanitize_float)
data["Balance"] = np.maximum(data.Balance - data["Accum Interest Shortfall"], 0)
data = data.join(discounts_table)
scen[j,:3] = data.df.dot(data[fields])
scen[j,3] = data.Balance.diff()[1:].dot(data.yearfrac.shift()[1:])/dealdata["currbal"]
scen[j,4] = np.dot(data.Cashflow, data.df * data.yearfrac)/scen[j,0] if scen[j,0] else 0
def compute_delta(dist, dealweight, cusip_pv, tradedate, K1 = 0, K2 = 1):
Ngrid, nT = dist["L"].shape
scenariosl = np.zeros((dealweight.size, nT))
scenariosr = np.zeros((dealweight.size, nT))
for t in range(nT):
scenariosl[:,t] = interpvalues(dist["L"][:,t], np.linspace(0, 1, Ngrid), dealweight)
scenariosr[:,t] = interpvalues(dist["R"][:,t], np.linspace(0, 1, Ngrid), dealweight)
if __name__=="__main__":
if len(sys.argv)>1:
tradedate = datetime.datetime.strptime(sys.argv[1], "%Y-%m-%d").date()
else:
tradedate = datetime.date.today()
params = processzipfiles(tradedate)
cfdata = getdealcf(params["dealnames"], params["zipfiles"], tradedate)
cusipdata = getcusipcf(params, cfdata, tradedate)
|