import pandas as pd from db import dbengine import matplotlib import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm from statsmodels.formula.api import gls import seaborn as sb df = pd.read_sql_table('external_marks_mapped',dbengine('dawndb'), parse_dates=['date']) df = df[df.source.notnull()] df_wide = (pd.pivot_table(df, 'mark', ['identifier', 'date'], 'source'). reset_index(). sort_values('date')) df_trades = pd.read_sql_query("select trade_date, identifier, principal_payment, price " \ "from bonds where buysell = 'f'" ,dbengine('dawndb'), parse_dates=['trade_date']) df_trades = df_trades.sort_values('trade_date') df_wide = pd.merge_asof(df_trades, df_wide, left_on='trade_date', right_on='date', by='identifier') df_long = pd.merge_asof(df_trades, df, left_on='trade_date', right_on='date', by='identifier') #pretty plot sns.lmplot(x='mark', y='price', hue = 'source', data=df_long, fit_reg=False) #fit all the models at once params = (df_long. groupby('source'). apply(lambda df: gls('price~mark', df).fit().params)