python/load_cf.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

import os
import os.path
import pandas as pd
from intex_scenarios import dealname_from_cusip
from zipfile import ZipFile
import re
import datetime
from dateutil.relativedelta import relativedelta
from db import query_db
import yaml
from quantlib.time.api import Schedule, Actual360, Period, Months, Calendar, Unadjusted, ModifiedFollowing
from quantlib.util.converter import pydate_to_qldate, qldate_to_pydate
from quantlib.settings import Settings
from yieldcurve import YC
import numpy as np
from optimization import KLfit
import rpy2.robjects as ro
import pdb

if os.name =='nt':
    root = "//WDsentinel/share/CorpCDOs"
elif os.name == 'posix':
    root = '/home/share/CorpCDOs'

def sanitize_float(string):
    try:
        string = string.replace(",","")
        if "(" in string:
            return - float(string[1:-1])
        else:
            return float(string)
    except AttributeError:
        return string


def processzipfiles(tradedate=datetime.date.today()):
    pricesdir = os.path.join(root, "Scenarios", "Prices_" + str(tradedate))
    zipfiles = [os.path.join(pricesdir, f) for f in os.listdir(pricesdir) \
                if f.endswith(".zip")]
    zipfiles = sorted(zipfiles, key=lambda x: os.stat(x).st_ctime)
    cusips_dict = {}
    dealnames_dict = {}
    for i, zip in enumerate(zipfiles):
        with ZipFile(zip) as myzip:
            allfiles = set([f.filename.split("-")[0] for f in myzip.infolist()])
            allfiles = allfiles - {"Total"}
            dealnames = set([f.filename.split("-")[0] for f in myzip.infolist() \
                             if re.search("COLLAT.*Scen100", f.filename)])
            cusips = allfiles - dealnames
            dealnames = [d.lower() for d in dealnames]
            cusips_dict.update((cusip, i) for cusip in cusips)
            dealnames_dict.update((dealname, i) for dealname in dealnames)
    return {"dealnames": dealnames_dict, "cusips":cusips_dict, "zipfiles": zipfiles}


def get_configfile(dealname, tradedate):
    configfile = os.path.join(root, "Scenarios", "Intex curves_" + str(tradedate),
                              "csv", dealname + ".config")
    try:
        with open(configfile) as fh:
            config = yaml.load(fh)
    except FileNotFoundError:
        config = {"reinvflag": True}
    return config

def get_dist(tradedate):
    distfile = os.path.join(root, "Scenarios", "marketdata-{0}.RData".format(str(tradedate)))
    dist = ro.r.load(distfile)
    return {"L": np.array(dist[0]),"R": np.array(dist[1])}

def get_dealdata(dealname, tradedate):
    sqlstr = "select \"Curr Collat Bal\", \"Reinv End Date\", \"Deal Next Pay Date\"," \
             "maturity, \"Principal Bal\", \"Pay Day\"  from historical_clo_universe(%s, %s)"
    sqldata = query_db(sqlstr, (dealname, str(tradedate)))
    return sqldata

def get_cusipdata(cusip, tradedate):
    sqlstring = "select curr_balance, spread from historical_cusip_universe(%s, %s)"
    sqldata = query_db(sqlstr, (cusip, str(tradedate)))
    return sqldata

def get_dealschedule(dealdata, freq='1Mo', adj=Unadjusted):
    us_cal = Calendar.from_name('USA')
    if not dealdata["Pay Day"] or abs((dealdata["Deal Next Pay Date"]- dealdata["Pay Day"]).days - 90)>10:
        dealdata["Pay Day"] = dealdata["Deal Next Pay Date"] + relativedelta(months=-3)
    return Schedule(pydate_to_qldate(dealdata["Pay Day"]), pydate_to_qldate(dealdata["maturity"]),
                     Period(freq), us_cal, adj, adj)

def getdealcf(dealnames, zipfiles, tradedate = datetime.date.today()):
    fiels = ["Cashflow", "Principal", "Interest"]
    n_scenarios = 100
    cfdata = {}
    m = YC(tradedate)
    fields = ["Cashflow", "Principal", "Interest"]
    for dealname, i in dealnames.items():
        zip = zipfiles[i]
        cfdata[dealname]={}
        mv = query_db("select marketvalue from latest_deal_model_numbers where dealname = %s",
                      (dealname,))[0]

        dealdata = get_dealdata(dealname, tradedate)
        cfdata[dealname] = {"mv":mv, "currbal": dealdata["Curr Collat Bal"]}
        config = get_configfile(dealname, tradedate)
        sched = get_dealschedule(dealdata)
        pd_sched = [pd.to_datetime(str(day), format="%d/%m/%Y") for day in sched]
        discounts = pd.Series([m.discount(day) if day > m.settle_date else 1 for day in sched],
                              index = pd_sched, name='df')

        if (not dealdata["Reinv End Date"] or not config["reinvflag"]):
             tranches = ["COLLAT"]
        else:
            tranches = ["COLLAT_INITIAL", "COLLAT_REINVEST"]

        cf = {}
        with ZipFile(zip) as myzip:
            for tranche in tranches:
                scen = np.zeros((100,3))
                for j in range(n_scenarios):
                    filename = "{0}-{1}-CF-Scen{2}.txt".format(dealname.upper(), tranche, j+1)
                    data = pd.read_table(myzip.open(filename), skiprows=[1, 2], parse_dates=[0],
                                         thousands=",",
                                         date_parser = lambda x: datetime.datetime.strptime(x, "%b %d, %Y"),
                                         index_col=0)

                    for c in fields:
                        if data.dtypes[c] != np.dtype('float64'):
                            data[c] = data[c].apply(sanitize_float)
                    data = data[fields].join(discounts)
                    scen[j,:] = np.dot(data["df"], data[fields])
                cf[tranche] = pd.DataFrame(scen, columns=fields)
        cfdata[dealname]["panel"] = pd.concat(cf)
        cf = cfdata[dealname]["panel"].Cashflow.sum(level=1)
        cfdata[dealname]["wapbasis"] = (cf.mean() - mv)/mv
        program = KLfit(cf.values/1e8, np.ones(n_scenarios)/n_scenarios, mv/1e8)
        cfdata[dealname]["weight"] = program["weight"]
        print(dealname)
    return cfdata

def getcusipcf(dealnames, cusip, cfdata, tradedate):
    dist = get_dist(tradedate)
    cusipdata = {}
    n_scenarios = 100
    m = YC(tradedate)
    intexfields = ["Cashflow", "Principal", "Interest", "Balance", "Accum Interest Shortfall"]
    fields = ["Cashflow", "Principal", "Interest", "wal", "duration"]

    for i, cusip in enumerate(cusips):
        zip = zipfiles[i]
        dealname = dealnames[i]
        dealdata = getdealdata(dealname)
        sched = getdealschedule(dealdata, ModifiedFollowing)
        pd_sched = [pd.to_datetime(str(day), format="%d/%m/%Y") for day in sched]
        discounts = pd.Series([m.discount(day) if day > m.settle_date else 1 for day in sched],
                              index = pd_sched, name='df')
        with ZipFile(zip) as myzip:
            for j in range(n_scenarios):
                filename = "{0}-CF-Scen{1}.txt".format(cusip, j+1)
                data = pd.read_table(myzip.open(filename), skiprows=[1, 2], parse_dates=[0],
                                     thousands = ",",
                                     date_parser = lambda x: datetime.datetime.strptime(x, "%b %d, %Y"),
                                     index_col = 0)
                for c in intexfields:
                    if data.dtypes[c] != np.dtype('float64'):
                        data[c] = data[c].apply(sanitize_float)
                data["Balance"] = np.maximum(data["Balance"]- data["Accum Interest Shortfall"], 0)
                data = data[fields].join(discounts)
                scen[j,:2] = np.dot(data["df"], data[fields])
                scen[j,3] = np.dot(np.diff(data.Balance), T[1:])/deldata["curr_balance"]
                scen[j,4] = if scen[j,0] np.dot(data.Cashflow, data.df * T)/scen[j,0] else 0

def compute_delta(dist, dealweight, cusip_pv, tradedate, K1 = 0, K2 = 1):
    Ngrid, nT = dist["L"].shape
    scenariosl = np.zeros((dealweight.size, nT))
    scenariosr = np.zeros((dealweight.size, nT))
    for t in range(nT):
        scenariosl[:,t] = interpvalues(dist["L"][:,t], np.linspace(0, 1, Ngrid), dealweight)
        scenariosr[:,t] = interpvalues(dist["R"][:,t], np.linspace(0, 1, Ngrid), dealweight)


if __name__=="__main__":
    tradedate = datetime.date.today()
    #tradedate = datetime.datetime.strptime("2014-03-19", "%Y-%m-%d").date()
    params = processzipfiles(tradedate)
    pomme = getdealcf(params["dealnames"], params["zipfiles"], tradedate)