blob: 4b174288099361c364f1ce5d356ef1d33997fa22 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
from db import dbengine
import pandas as pd
import numpy as np
def calc_mark_diff():
df = pd.read_sql_query("select * from external_marks_mapped where mark < 1000",
dbengine('dawndb'), parse_dates=['date'],
index_col=['date', 'identifier'])
df_manager = df[df.source == 'MANAGER']
df_ext = df[df.source != 'MANAGER']
def closest(s):
temp = abs(s - df_manager.loc[s.index].mark).values
idx = np.argmin(temp)
if np.isnan(idx):
return np.NaN
else:
return s.iat[idx]
def avg_minus_maxmin(s):
if s.count() >= 3:
return (s.sum() - s.max() - s.min()) / (s.count() - 2)
else:
return s.mean()
df_scheme = (df_ext.groupby(['date', 'identifier']).
agg({'mark': ['mean', 'median', avg_minus_maxmin, closest]}))
return df_scheme.apply(lambda x: x - df_manager.mark)
|