import pandas as pd from db import dbengine import numpy as np import statsmodels.api as sm from statsmodels.formula.api import gls from pandas.tseries.offsets import MonthEnd import datetime from PyQt4 import QtGui #has conflict with matplotlib #import matplotlib #matplotlib.use("Q4Agg") #to prevent conflict with PyQT4 def nearestDate(base, dates): nearness = { abs(base.timestamp() - date.timestamp()) : date for date in dates } return nearness[min(nearness.keys())] #Df = Read all external Marks df = (pd.read_sql_table('external_marks_mapped',dbengine('rmbs_model'), parse_dates=['date']). sort_values('date')) #Ddf1 = reformat dates, get rid of duplicates of external marks df1 = df df1['date'] = pd.to_datetime(df['date'], errors= 'coerce') df1 = df1.sort_values('date') df1 = df1.set_index(['date','identifier','source']) df1 = df1.groupby(level=['date','identifier','source'])['mark'].mean() df1 = df1.reset_index() df1 = df1.set_index(['date']) #df2 = mapping of monthend to the closest available model date mapping = pd.read_sql_query("select distinct date(timestamp) as model_date from priced",dbengine('dawndb'), parse_dates = ['model_date']).sort_values('model_date') df2 = df.set_index(['date']) df2 = df2.groupby(level=['date'])['mark'].mean() for dt in df2.index: df2[dt]= nearestDate(dt, mapping['model_date']) df2 = pd.DataFrame(df2) df2.columns = ['model_date'] #merge the mapped model dates back to the external marks table df1 = df1.join(df2) #Df3 = all the model runs on the available model dates df3=pd.DataFrame() sql_str = "select timestamp as date, cusip, moddur from priced where date(timestamp) = %s and model_version = 3" for date in df2['model_date']: df3= df3.append(pd.read_sql_query(sql_str,dbengine('dawndb'), parse_dates = ['date'], params=(date, ))) df3= df3.rename(columns = {"cusip": "identifier"}) df3 = df3.sort_values('date') df3['date'] = df3['date'].dt.date df3 = df3.set_index(['date', 'identifier']) #Now merge external monthend marks with the available model dates df1 = df1.reset_index() df1= df1.rename(columns = {"model_date": "date", "date": "monthend_date"}) df1 = df1.set_index(['date','identifier']) df1 = df1.merge(df3, left_index = True, right_index= True) df1['spread'] = (100-df1['mark'])/df1['moddur'] df1 = df1.reset_index() df1 = df1.set_index(['identifier','source','monthend_date']) #df5 = difference in spread difference = df1.groupby(level=['identifier','source']).diff() def readtenorSpread1(index="IG", tenor="5yr"): date = (pd.Timestamp.now() - pd.DateOffset(years=4)).date() sql_str = "SELECT date, series, closespread AS spread FROM index_quotes " \ "WHERE index=%s and date>=%s and tenor = %s" df = pd.read_sql_query(sql_str, dbengine('serenitasdb'), parse_dates=['date'], index_col=['date', 'series'], params=(index.upper(), date, "5yr")) df1 = df.unstack(level=1).resample('1m').last().diff() df2 = df1.stack(level = 1).groupby(level = 'date').last() return df2 #failed code #df1 = df #df2 = df #df1 = df1.set_index(['date','identifier','source']) #df2['prevdate'] = df2['date']- MonthEnd(n=1) #df2 = df2.rename(columns={'date': 'nextdate', 'prevdate':'date'}) #df2 = df2.set_index(['date','identifier','source']) #import pdb; pdb.set_trace() #df3 = df1.mark - df2.mark #now let's get HY spreads # index = 'IG' # def readtenorSpreadold(index): # serenitasdb = dbengine('serenitasdb') # maxindexnumber = pd.read_sql_query("select max(series) from index_version where index= %s",serenitasdb, params=(index,))['max'][0] # d={} # seriesstart = {} # df = pd.DataFrame() # for i in range(maxindexnumber-8, maxindexnumber+1): # #import pdb; pdb.set_trace() # d[i]= pd.read_sql_query("select date, closeprice, closespread from index_quotes where index= %s and series = %s and tenor = '5yr' order by date desc",serenitasdb, parse_dates = ['date'], params=(index,i)) # seriesstart[i] = d[i].min(axis=0).date # seriesstart[maxindexnumber+1]= pd.Timestamp(2030,1,1) # for i in range(maxindexnumber-8, maxindexnumber+1): # df = df.append(d[i][(d[i].date>seriesstart[i]) & (d[i].date