aboutsummaryrefslogtreecommitdiffstats
path: root/python/externalmarksbackfill.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/externalmarksbackfill.py')
-rw-r--r--python/externalmarksbackfill.py54
1 files changed, 21 insertions, 33 deletions
diff --git a/python/externalmarksbackfill.py b/python/externalmarksbackfill.py
index cbcf57c5..f6e4fbec 100644
--- a/python/externalmarksbackfill.py
+++ b/python/externalmarksbackfill.py
@@ -10,52 +10,40 @@ from itertools import chain
from glob import glob, iglob
from pandas.tseries.offsets import MonthEnd
-#engine = create_engine('postgresql://dawn_user@debian/dawndb')
-
def runAllFill():
for f in get_globs():
if settings[os.path.basename(f)][2] == "N":
runSingleFill(f)
def runSingleFill(f):
-
- conn = dbconn('dawndb')
- sql_str = "INSERT INTO external_marks VALUES(%(identifier)s, %(date)s, %(mark)s, %(source)s)"
- c = conn.cursor()
-
- (range_name, sheet_name, done) = settings[os.path.basename(f)]
- dirstructure = os.path.dirname(f).split('/')
+ range_name, sheet_name, done = settings[os.path.basename(f)]
+ dirstructure = os.path.dirname(f).split(os.path.sep)
markdate = (pd.Timestamp(dirstructure[5].replace("_","-"))+MonthEnd()).date()
marks = pd.read_excel(f, sheet_name, skiprows=13, parse_cols=range_name)
+ df = pd.DataFrame()
for s in ['1st', '2nd', '3rd'] +[str(i)+'th' for i in range(4, 12)]:
if s+' Source Price' not in marks:
break
- if s == '1st':
- temp = marks.loc[:, ['CUSIP', '1st Source Price', '1st Source']]
- temp.columns = ['identifier', 'mark', 'source']
- else:
- temp1 = marks.loc[:, ['CUSIP', s+' Source Price', s+' Source']]
- temp1.columns = ['identifier', 'mark', 'source']
- temp = temp.append(temp1)
+ temp = marks[['CUSIP', s+' Source Price', s+' Source']]
+ temp.columns = ['identifier', 'mark', 'source']
+ df = df.append(temp)
+ df = df.dropna()
+ df = df[df.mark != 0]
+ df['date'] = markdate
+ df['source'] = df['source'].str.upper()
- for columnname in temp.columns:
- temp.dropna(axis=0, subset=[columnname], inplace=True)
-
- temp = temp[temp.mark != 0]
- temp.loc[:, 'date'] = markdate
- temp['source']= temp['source'].str.upper()
-
- for r in temp.to_dict(orient='record'):
- try:
- #import pdb; pdb.set_trace()
- c.execute(sql_str, r)
- except (psycopg2.DataError, psycopg2.IntegrityError) as detail:
- print(detail)
- conn.rollback()
- finally:
- conn.commit()
- c.close()
+ conn = dbconn('dawndb')
+ sql_str = "INSERT INTO external_marks VALUES(%(identifier)s, %(date)s, %(mark)s, %(source)s)"
+ with conn.cursor() as c:
+ for r in df.to_dict(orient='record'):
+ try:
+ c.execute(sql_str, r)
+ except (psycopg2.DataError, psycopg2.IntegrityError) as detail:
+ print(detail)
+ conn.rollback()
+ finally:
+ conn.commit()
def get_globs():
basedir = '/home/serenitas/Daily'