diff options
Diffstat (limited to 'python/load_globeop_report.py')
| -rw-r--r-- | python/load_globeop_report.py | 138 |
1 files changed, 63 insertions, 75 deletions
diff --git a/python/load_globeop_report.py b/python/load_globeop_report.py index 68c1aa9d..377f2746 100644 --- a/python/load_globeop_report.py +++ b/python/load_globeop_report.py @@ -17,35 +17,42 @@ def get_globs(fname, years=['2013', '2014', '2015', '2016', '2017']): fname)))) return globs -def valuation_reports(): - df = pd.DataFrame() - for f in chain.from_iterable(get_globs('Valuation_Report')): - try: - date = pd.Timestamp(f.split('/')[6]) - except ValueError: - date = pd.Timestamp(f.split('/')[4]) +def read_valuation_report(f): + try: + date = pd.Timestamp(f.split('/')[6]) + except ValueError: + date = pd.Timestamp(f.split('/')[4]) + if date >= pd.Timestamp('2013-02-06'): + df = pd.read_csv(f, parse_dates=['KnowledgeDate','PeriodEndDate']) + else: + df = pd.read_csv(f) + df['KnowledgeDate'] = date + df['PeriodEndDate'] = date - bus_day + df['row'] = df.index + if 'AccountingPeriod' in df: + del df['AccountingPeriod'] + if "Strat" in df: + df.Strat = df.Strat.str.replace("^(SERCGMAST__){1,2}(M_|SER_)?", "", 1) + if "Port" in df: + df.Port = df.Port.str.replace("^(SERCGMAST__){1,2}(SERG__|SERG_)?", "", 1) + df.columns = df.columns.str.lower() + return df - if date >= pd.Timestamp('2013-02-06'): - newdf = pd.read_csv(f, parse_dates=['KnowledgeDate','PeriodEndDate']) - else: - newdf = pd.read_csv(f) - newdf['KnowledgeDate'] = date - newdf['PeriodEndDate'] = date - bus_day - newdf['row'] = newdf.index - if newdf.empty or ('PeriodEndDate' in df and \ - not df[df.PeriodEndDate == newdf.PeriodEndDate.iat[0]].empty): - continue - df = df.append(newdf) - del df['AccountingPeriod'] +def valuation_reports(): + df = pd.concat(read_valuation_report(f) for f in + chain.from_iterable(get_globs('Valuation_Report'))) + # There can be duplicates in case of holidays + df = df.sort_values(['periodenddate', 'row', 'knowledgedate']) + df = df.drop_duplicates(['periodenddate', 'row'], 'last') + df.to_sql('valuation_reports', dbengine('dawndb'), if_exists='append', index=False) - ## cleanups +def read_pnl_report(f): + df = pd.read_csv(f) df.Strat = df.Strat.str.replace("^(SERCGMAST__){1,2}(M_|SER_)?", "", 1) df.Port = df.Port.str.replace("^(SERCGMAST__){1,2}(SERG__|SERG_)?", "", 1) - for col in ['Strat', 'InvCcy', 'Fund', 'Port']: - df[col] = df[col].astype('category') - df.columns = df.columns.str.lower() - - df.to_sql('val_reports', dbengine('dawndb'), if_exists='append', index=False) + df['LongShortIndicator'] = df['LongShortIndicator'].str.strip() + df.columns = df.columns.str.lower().str.replace(" ", "") + return df def pnl_reports(): df = {} @@ -55,73 +62,54 @@ def pnl_reports(): except ValueError: date = pd.Timestamp(f.split('/')[4]) date = date - bus_day - df[date] = pd.read_csv(f) - df[date]['row'] = df[date].index - df = pd.concat(df, names=['date', 'to_drop']) - df.reset_index(level='to_drop', drop=True, inplace=True) - df.Strat = df.Strat.str.replace("^(SERCGMAST__){1,2}(M_|SER_)?", "", 1) - df.Port = df.Port.str.replace("^(SERCGMAST__){1,2}(SERG__|SERG_)?", "", 1) - for col in ['Fund', 'Strat', 'Port', 'LongShortIndicator', 'InvCcy']: - df[col] = df[col].astype('category') - - ## cleanups - df = df.reset_index() - df.columns = df.columns.str.lower() - df['longshortindicator'] = df['longshortindicator'].str.strip() - df.columns = [c.replace(" ", "") for c in df.columns] - + df[date] = read_pnl_report(f) + df = pd.concat(df, names=['date', 'row']).reset_index() df.to_sql('pnl_reports', dbengine('dawndb'), if_exists='append', index=False) -def cds_reports(): - df = {} - for f in chain.from_iterable(get_globs('CDS_Report')): - try: - date = pd.Timestamp(f.split('/')[6]) - except ValueError: - date = pd.Timestamp(f.split('/')[4]) - date = date - bus_day - df[date] = pd.read_csv(f) - df[date]['row'] = df[date].index - df = pd.concat(df, names=['date', 'to_drop']) - df.reset_index(level='to_drop', drop=True, inplace=True) - for col in ['Buy/Sell', 'Counterparty', 'CCP', 'Ccy', 'Direction', 'Price Ccy', - 'Period End Date', 'Basis', 'Roll Convention', 'Settle Mode', - 'Strategy', 'Trade Type', 'Trade Status', 'Prime Broker']: - df[col] = df[col].astype('category') - for col in df.columns: - if 'Date' in col and col != 'Period End Date': - df[col] = pd.to_datetime(df[col]) +def read_cds_report(f, old_report=False): + df = pd.read_csv(f) for col in df.columns: vc = len(df[col].value_counts()) if vc == 0: del df[col] continue - if df[col].dtype == 'object' and vc < 20: - df[col] = df[col].astype('category') - contract = df['Contractual Definition'] - contract = contract.where(contract.isin(['ISDA2014', 'ISDA2003Cred']), 'ISDA2014').astype('category') - df['Contractual Definition'] = contract + if 'Contractual Definition' in df: + contract = df['Contractual Definition'] + contract = contract.where(contract.isin(['ISDA2014', 'ISDA2003Cred']), 'ISDA2014') + df['Contractual Definition'] = contract df = df.drop(['Bloomberg Yellow key', 'Created User', 'Last Modified User', 'Last Modified Date', 'Fund Long Name', 'Instrument Sub Type', 'Netting Id', 'Client', 'Trade Status', 'Position Status', 'Clearing Broker', 'Settle Mode', 'Off Price', 'On Price', - 'Price Ccy'], - axis=1) - df.columns = df.columns.str.lower() - df.columns = df.columns.str.replace(" ", "_") - df.roll_convention = df.roll_convention.str.title() + 'Price Ccy'], axis=1, errors='ignore') + df.columns = df.columns.str.lower().str.replace(" ", "_") + if old_report: + df.calendar = df.calendar.str.replace(" ", "") + df = df.rename(columns={'direction': 'buy/sell'}) + df.roll_convention = df.roll_convention.str.title() df = df[df.strategy != 'SER_TEST'] df.loc[df.strategy == 'SERCGMAST__MBSCDS', 'strategy'] = 'MBSCDS' df.strategy = df.strategy.str.replace("SER_","") - df.loc[df['buy/sell'].isnull(), 'buy/sell'] = df.loc[df['buy/sell'].isnull(), 'direction'] + df['buy/sell'] = df['buy/sell'].astype('category') df['buy/sell'].cat.categories = ['Buyer', 'Seller'] - del df['direction'] - df.prime_broker = df.prime_broker.cat.remove_categories('NONE') - df.calendar = df.calendar.str.replace(" ", "") - df['executing_broker'] = df['executing_broker'].astype('object') + df.prime_broker = df.prime_broker.where(df.prime_broker != 'NONE') df.loc[df.executing_broker.isnull(),'executing_broker'] = df[df.executing_broker.isnull()].counterparty del df['counterparty'] - df = df.rename(columns={'executing_broker': 'counterparty'}) + df = df.rename(columns={'executing_broker': 'counterparty', + 'independent_%':'independent_perc'}) + return df + +def cds_reports(): + df = {} + for f in chain.from_iterable(get_globs('CDS_Report')): + try: + date = pd.Timestamp(f.split('/')[6]) + except ValueError: + date = pd.Timestamp(f.split('/')[4]) + old_report = date <= pd.Timestamp('2017-02-28') or date == pd.Timestamp('2017-03-02') + date = date - bus_day + df[date] = read_cds_report(f, old_report) + df = pd.concat(df, names=['date', 'row']).reset_index() return df def monthly_pnl_bycusip(df, strats): |
