from glob import iglob import os import pandas as pd from itertools import chain from pandas.tseries.offsets import BDay import pdb def daily_reports(fname, years=['2013', '2014', '2015']): df = pd.DataFrame() basedir = '/home/share/Daily' globs = [iglob(os.path.join(basedir, year, ("{0}_*/{0}*/Reports/{1}.csv". format(year, fname)))) for year in years] globs.append(iglob(os.path.join(basedir, '{0}-*/Reports/{1}.csv'.format(years[-1], fname)))) for f in chain.from_iterable(globs): try: date = pd.Timestamp(f.split('/')[6]) except ValueError: date = pd.Timestamp(f.split('/')[4]) if date>=pd.Timestamp('2013-02-06'): newdf = pd.read_csv(f, parse_dates=['KnowledgeDate','PeriodEndDate']) else: newdf = pd.read_csv(f) newdf['KnowledgeDate'] = date newdf['PeriodEndDate'] = date - BDay(1) if newdf.empty or ('PeriodEndDate' in df and \ not df[df.PeriodEndDate == newdf.PeriodEndDate.iat[0]].empty): continue df = df.append(newdf) del df['AccountingPeriod'] for col in ['Strat','InvCcy','Fund','Port']: df[col] = df[col].astype('category') df.to_hdf('globeop.hdf', fname.lower(), format='table', complib='blosc') if __name__=='__main__': #daily_reports('Pnl') daily_reports('Valuation_Report') df = pd.read_hdf('globeop.hdf', 'valuation_report') nav = df[df.Fund=='SERCGMAST'].groupby('PeriodEndDate')['EndBookNAV'].sum()