aboutsummaryrefslogtreecommitdiffstats
path: root/python/exploration/backtest.py
blob: 4b174288099361c364f1ce5d356ef1d33997fa22 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from db import dbengine
import pandas as pd
import numpy as np

def calc_mark_diff():
    df = pd.read_sql_query("select * from external_marks_mapped where mark < 1000",
                           dbengine('dawndb'), parse_dates=['date'],
                           index_col=['date', 'identifier'])
    df_manager = df[df.source == 'MANAGER']
    df_ext = df[df.source != 'MANAGER']

    def closest(s):
        temp = abs(s - df_manager.loc[s.index].mark).values
        idx = np.argmin(temp)
        if np.isnan(idx):
            return np.NaN
        else:
            return s.iat[idx]

    def avg_minus_maxmin(s):
        if s.count() >= 3:
            return (s.sum() - s.max() - s.min()) / (s.count() - 2)
        else:
            return s.mean()

    df_scheme = (df_ext.groupby(['date', 'identifier']).
                 agg({'mark': ['mean', 'median', avg_minus_maxmin, closest]}))
    return df_scheme.apply(lambda x: x - df_manager.mark)