from db import dbengine import pandas as pd import numpy as np def calc_mark_diff(): df = pd.read_sql_query("select * from external_marks_mapped where mark < 1000", dbengine('dawndb'), parse_dates=['date'], index_col=['date', 'identifier']) df_manager = df[df.source == 'MANAGER'] df_ext = df[df.source != 'MANAGER'] def closest(s): temp = abs(s - df_manager.loc[s.index].mark).values idx = np.argmin(temp) if np.isnan(idx): return np.NaN else: return s.iat[idx] def avg_minus_maxmin(s): if s.count() >= 3: return (s.sum() - s.max() - s.min()) / (s.count() - 2) else: return s.mean() df_scheme = (df_ext.groupby(['date', 'identifier']). agg({'mark': ['mean', 'median', avg_minus_maxmin, closest]})) return df_scheme.apply(lambda x: x - df_manager.mark)