diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/externalmarksbackfill.py | 54 |
1 files changed, 21 insertions, 33 deletions
diff --git a/python/externalmarksbackfill.py b/python/externalmarksbackfill.py index cbcf57c5..f6e4fbec 100644 --- a/python/externalmarksbackfill.py +++ b/python/externalmarksbackfill.py @@ -10,52 +10,40 @@ from itertools import chain from glob import glob, iglob from pandas.tseries.offsets import MonthEnd -#engine = create_engine('postgresql://dawn_user@debian/dawndb') - def runAllFill(): for f in get_globs(): if settings[os.path.basename(f)][2] == "N": runSingleFill(f) def runSingleFill(f): - - conn = dbconn('dawndb') - sql_str = "INSERT INTO external_marks VALUES(%(identifier)s, %(date)s, %(mark)s, %(source)s)" - c = conn.cursor() - - (range_name, sheet_name, done) = settings[os.path.basename(f)] - dirstructure = os.path.dirname(f).split('/') + range_name, sheet_name, done = settings[os.path.basename(f)] + dirstructure = os.path.dirname(f).split(os.path.sep) markdate = (pd.Timestamp(dirstructure[5].replace("_","-"))+MonthEnd()).date() marks = pd.read_excel(f, sheet_name, skiprows=13, parse_cols=range_name) + df = pd.DataFrame() for s in ['1st', '2nd', '3rd'] +[str(i)+'th' for i in range(4, 12)]: if s+' Source Price' not in marks: break - if s == '1st': - temp = marks.loc[:, ['CUSIP', '1st Source Price', '1st Source']] - temp.columns = ['identifier', 'mark', 'source'] - else: - temp1 = marks.loc[:, ['CUSIP', s+' Source Price', s+' Source']] - temp1.columns = ['identifier', 'mark', 'source'] - temp = temp.append(temp1) + temp = marks[['CUSIP', s+' Source Price', s+' Source']] + temp.columns = ['identifier', 'mark', 'source'] + df = df.append(temp) + df = df.dropna() + df = df[df.mark != 0] + df['date'] = markdate + df['source'] = df['source'].str.upper() - for columnname in temp.columns: - temp.dropna(axis=0, subset=[columnname], inplace=True) - - temp = temp[temp.mark != 0] - temp.loc[:, 'date'] = markdate - temp['source']= temp['source'].str.upper() - - for r in temp.to_dict(orient='record'): - try: - #import pdb; pdb.set_trace() - c.execute(sql_str, r) - except (psycopg2.DataError, psycopg2.IntegrityError) as detail: - print(detail) - conn.rollback() - finally: - conn.commit() - c.close() + conn = dbconn('dawndb') + sql_str = "INSERT INTO external_marks VALUES(%(identifier)s, %(date)s, %(mark)s, %(source)s)" + with conn.cursor() as c: + for r in df.to_dict(orient='record'): + try: + c.execute(sql_str, r) + except (psycopg2.DataError, psycopg2.IntegrityError) as detail: + print(detail) + conn.rollback() + finally: + conn.commit() def get_globs(): basedir = '/home/serenitas/Daily' |
