diff options
| -rw-r--r-- | python/load_intex_collateral.py | 157 |
1 files changed, 83 insertions, 74 deletions
diff --git a/python/load_intex_collateral.py b/python/load_intex_collateral.py index 633dfb74..7da4b13d 100644 --- a/python/load_intex_collateral.py +++ b/python/load_intex_collateral.py @@ -1,8 +1,5 @@ import psycopg2 -import os -import re -import csv -import datetime +import os, csv, datetime import pdb from common import root, sanitize_float from db import conn, query_db @@ -20,70 +17,83 @@ fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Maturity Date', \ def convertToNone(s): return None if s=='' else s -def upload_data(conn, dealnames, workdate): +def windows1252_encoder(fh): + for line in fh: + yield line.decode('windows-1252').encode('utf-8') + +def upload_data(conn, dealnames, workdate, logger=None): for dealname in dealnames: #dealname, updatedate = line.rstrip().split() # updatedate = datetime.datetime.strptime(updatedate, '%m/%d/%Y') #dealname = dealname.upper() + ",AD.txt - with open( os.path.join(root, "data", "Collaterals_" + workdate, dealname.upper() + "_AD.txt"), - "r", encoding='windows-1252') as fh: - dr = csv.DictReader(fh, dialect = 'excel-tab') - missingfields = set(fields).union({'Gross Margin'}) - set(dr.fieldnames) - if "LoanX ID" in missingfields: - print("LoanX ID column is missing. Probably an error in exporting from intex") - pdb.set_trace() - data = {} - for line in dr: - for f in missingfields: - line[f] = None - try: - line['Fixed or Float'] = line['Fixed or Float'].upper() - line['LoanX ID'] = line['LoanX ID'].upper() - line['CUSIP'] = line['CUSIP'].upper() - line['Asset Subtype'] = line['Asset Subtype'].replace("Reinvest ", "") - line['Asset Subtype'] = line['Asset Subtype'].replace("Reinv ", "") - except AttributeError: - pass - else: - #sanity checks for loanxid and cusip - if len(line['LoanX ID']) > 8: - print("dubious id found: {0}".format(line['LoanX ID'])) - line['LoanX ID'] = line['LoanX ID'][:8] - if len(line['CUSIP']) > 9: - print("dubious CUSIP found: {0}".format(line['CUSIP'])) - line['CUSIP'] = line['CUSIP'][:9] - if len(line['Asset Subtype'])>10: - line['Asset Subtype'] = line['Asset Subtype'][:9] + basedir = os.path.join(root, "data", "Collaterals_" + workdate) + if sys.version_info[0]==2: + fh = open(os.path.join(basedir, dealname.upper() + "_AD.txt")) + fh = windows1252_encoder(fh) + else: + fh = open(os.path.join(basedir, dealname.upper() + "_AD.txt"), encoding="windows-1252") - if 'Reinvest Collat' not in missingfields and \ - line['Reinvest Collat'].upper() == 'Y' or line['Issuer'] == '': - # assume it's a reinvestment asset - line['Reinvest Collat'] = True - line['Issuer'] = line['ID Number'] - if not line['Spread']: - line['Spread'] = line['Gross Margin'] + dr = csv.DictReader(fh, dialect = 'excel-tab') + missingfields = set(fields).union({'Gross Margin'}) - set(dr.fieldnames) + if "LoanX ID" in missingfields: + msg = "{0}: LoanX ID column is missing. Probably an error in exporting from intex".format(dealname) + if logger: + logger.info(msg) + else: + print(msg) + data = {} + for line in dr: + for f in missingfields: + line[f] = None + try: + line['Fixed or Float'] = line['Fixed or Float'].upper() + line['LoanX ID'] = line['LoanX ID'].upper() + line['CUSIP'] = line['CUSIP'].upper() + line['Asset Subtype'] = line['Asset Subtype'].replace("Reinvest ", "") + line['Asset Subtype'] = line['Asset Subtype'].replace("Reinv ", "") + except AttributeError: + pass + else: + #sanity checks for loanxid and cusip + if len(line['LoanX ID']) > 8: + print("dubious id found: {0}".format(line['LoanX ID'])) + line['LoanX ID'] = line['LoanX ID'][:8] + if len(line['CUSIP']) > 9: + print("dubious CUSIP found: {0}".format(line['CUSIP'])) + line['CUSIP'] = line['CUSIP'][:9] + if len(line['Asset Subtype'])>10: + line['Asset Subtype'] = line['Asset Subtype'][:9] - for field in ['Spread', 'Gross Coupon', 'Market Price', 'Contributed Balance']: - line[field] = sanitize_float(line[field]) - if line['Market Price'] == 0: - line['Market Price'] = None - #we store the Libor Floor in the database, so Life Floor is really Libor Floor - if line['Life Floor'] == "No limit": - line['Life Floor'] = 0 - elif line['Life Floor']: - try: - line['Life Floor'] = float(line['Life Floor']) - float(line['Spread']) - except ValueError: - line['Life Floor'] = float('Nan') + if 'Reinvest Collat' not in missingfields and \ + line['Reinvest Collat'].upper() == 'Y' or line['Issuer'] == '': + # assume it's a reinvestment asset + line['Reinvest Collat'] = True + line['Issuer'] = line['ID Number'] + if not line['Spread']: + line['Spread'] = line['Gross Margin'] + + for field in ['Spread', 'Gross Coupon', 'Market Price', 'Contributed Balance']: + line[field] = sanitize_float(line[field]) + if line['Market Price'] == 0: + line['Market Price'] = None + #we store the Libor Floor in the database, so Life Floor is really Libor Floor + if line['Life Floor'] == "No limit": + line['Life Floor'] = 0 + elif line['Life Floor']: + try: + line['Life Floor'] = float(line['Life Floor']) - float(line['Spread']) + except ValueError: + line['Life Floor'] = float('Nan') - # we take care of reinvestment asset lines - if not line['Asset Name']: - line['Asset Name'] = 'Reinv' - r = [convertToNone(line[field]) for field in fields] - #sometimes the Asset Name is not unique (we add random tag in this case) - if r[0] in data: - r[0] = r[0] + str(uuid.uuid4())[:3] - data[r[0]] = r[1:] + # we take care of reinvestment asset lines + if not line['Asset Name']: + line['Asset Name'] = 'Reinv' + r = [convertToNone(line[field]) for field in fields] + #sometimes the Asset Name is not unique (we add random tag in this case) + if r[0] in data: + r[0] = r[0] + str(uuid.uuid4())[:3] + data[r[0]] = r[1:] + fh.close() sqlstr = "select distinct(updatedate) from et_collateral where dealname= %s" old_update_dates = [date[0] for date in query_db(sqlstr, params=(dealname,), one=False)] @@ -120,18 +130,11 @@ def upload_data(conn, dealnames, workdate): pdb.set_trace() conn.commit() -if __name__ == "__main__": - if len(sys.argv) > 1: - workdate = sys.argv[1] - else: - workdate = str(datetime.date.today()) - if len(sys.argv) > 2: - dealnames = sys.argv[2:] - else: - dealnames = [re.sub("_AD.txt", "", d).lower() for d in - os.listdir(os.path.join(root, "data", "Collaterals_" + workdate))] - files = [os.path.join(root, "data", "Indicative_" + workdate, f) for f in - os.listdir(os.path.join(root, "data", "Indicative_" + workdate))] +def intex_data(conn, workdate, logger=None): + dealnames = [d.replace("_AD.txt", "").lower() for d in + os.listdir(os.path.join(root, "data", "Collaterals_" + workdate))] + basedir = os.path.join(root, "data", "Indicative_" + workdate) + files = [os.path.join(basedir, f) for f in os.listdir(basedir)] cusip_files = [f for f in files if "TrInfo" in f] deal_files = [f for f in files if "TrInfo" not in f] #first load deal data @@ -140,7 +143,13 @@ if __name__ == "__main__": #then load tranche data for cusip_file in cusip_files: upload_cusip_data(cusip_file) - upload_data(conn, dealnames, workdate) + upload_data(conn, dealnames, workdate, logger) +if __name__ == "__main__": + if len(sys.argv) > 1: + workdate = sys.argv[1] + else: + workdate = str(datetime.date.today()) + intex_data(conn, workdate) conn.close() print("done") |
