1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
import pandas as pd
from db import dbengine
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import gls
from pandas.tseries.offsets import MonthEnd
import datetime
from PyQt4 import QtGui #has conflict with matplotlib
#import matplotlib
#matplotlib.use("Q4Agg") #to prevent conflict with PyQT4
def nearestDate(base, dates):
nearness = { abs(base.timestamp() - date.timestamp()) : date for date in dates }
return nearness[min(nearness.keys())]
#Df = Read all external Marks
df = (pd.read_sql_table('external_marks_mapped',dbengine('rmbs_model'),
parse_dates=['date']).
sort_values('date'))
#Ddf1 = reformat dates, get rid of duplicates of external marks
df1 = df
df1['date'] = pd.to_datetime(df['date'], errors= 'coerce')
df1 = df1.sort_values('date')
df1 = df1.set_index(['date','identifier','source'])
df1 = df1.groupby(level=['date','identifier','source'])['mark'].mean()
df1 = df1.reset_index()
df1 = df1.set_index(['date'])
#df2 = mapping of monthend to the closest available model date
mapping = pd.read_sql_query("select distinct date(timestamp) as model_date from priced",dbengine('dawndb'), parse_dates = ['model_date']).sort_values('model_date')
df2 = df.set_index(['date'])
df2 = df2.groupby(level=['date'])['mark'].mean()
for dt in df2.index:
df2[dt]= nearestDate(dt, mapping['model_date'])
df2 = pd.DataFrame(df2)
df2.columns = ['model_date']
#merge the mapped model dates back to the external marks table
df1 = df1.join(df2)
#Df3 = all the model runs on the available model dates
df3=pd.DataFrame()
sql_str = "select timestamp as date, cusip, moddur from priced where date(timestamp) = %s and model_version = 3"
for date in df2['model_date']:
df3= df3.append(pd.read_sql_query(sql_str,dbengine('dawndb'), parse_dates = ['date'], params=(date, )))
df3= df3.rename(columns = {"cusip": "identifier"})
df3 = df3.sort_values('date')
df3['date'] = df3['date'].dt.date
df3 = df3.set_index(['date', 'identifier'])
#Now merge external monthend marks with the available model dates
df1 = df1.reset_index()
df1= df1.rename(columns = {"model_date": "date", "date": "monthend_date"})
df1 = df1.set_index(['date','identifier'])
df1 = df1.merge(df3, left_index = True, right_index= True)
df1['spread'] = (100-df1['mark'])/df1['moddur']
df1 = df1.reset_index()
df1 = df1.set_index(['identifier','source','monthend_date'])
#df5 = difference in spread
difference = df1.groupby(level=['identifier','source']).diff()
def readtenorSpread1(index="IG", tenor="5yr"):
date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date()
sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \
"WHERE index=%s and date>=%s and tenor = %s"
df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'],
params=(index.upper(), date, "5yr"))
df1 = df.unstack(level=1).resample('1m').last().diff()
df2 = df1.stack(level = 1).groupby(level = 'date').last()
return df2
#failed code
#df1 = df
#df2 = df
#df1 = df1.set_index(['date','identifier','source'])
#df2['prevdate'] = df2['date']- MonthEnd(n=1)
#df2 = df2.rename(columns={'date': 'nextdate', 'prevdate':'date'})
#df2 = df2.set_index(['date','identifier','source'])
#import pdb; pdb.set_trace()
#df3 = df1.mark - df2.mark
#now let's get HY spreads
# index = 'IG'
# def readtenorSpreadold(index):
# serenitasdb = dbengine('serenitasdb')
# maxindexnumber = pd.read_sql_query("select max(series) from index_version where index= %s",serenitasdb, params=(index,))['max'][0]
# d={}
# seriesstart = {}
# df = pd.DataFrame()
# for i in range(maxindexnumber-8, maxindexnumber+1):
# #import pdb; pdb.set_trace()
# d[i]= pd.read_sql_query("select date, closeprice, closespread from index_quotes where index= %s and series = %s and tenor = '5yr' order by date desc",serenitasdb, parse_dates = ['date'], params=(index,i))
# seriesstart[i] = d[i].min(axis=0).date
# seriesstart[maxindexnumber+1]= pd.Timestamp(2030,1,1)
# for i in range(maxindexnumber-8, maxindexnumber+1):
# df = df.append(d[i][(d[i].date>seriesstart[i]) & (d[i].date<seriesstart[i+1])])
# df = df.sort_values('date')
# df = df.set_index('date')
# df = df.diff()
# return df.dropna()
|