diff options
| -rw-r--r-- | python/load_intex_collateral.py | 70 |
1 files changed, 28 insertions, 42 deletions
diff --git a/python/load_intex_collateral.py b/python/load_intex_collateral.py index cfa4d71f..b530d246 100644 --- a/python/load_intex_collateral.py +++ b/python/load_intex_collateral.py @@ -7,9 +7,10 @@ import pdb from common import root from db import conn, query_db import sys +import uuid fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Maturity Date', \ - 'Asset Subtype', 'Asset Type', 'Gross Coupon', 'Gross Margin', 'Spread', \ + 'Asset Subtype', 'Asset Type', 'Gross Coupon', 'Spread', \ 'Frequency', 'Next Paydate', 'Second Lien', 'LoanX ID', 'CUSIP', 'Market Price', 'Market Price Source', 'Price Date', 'Fixed or Float', \ 'Defaulted Flag', 'Security Sub-Category', 'Structured Finance Security', \ @@ -21,13 +22,12 @@ def convertToNone(s): def sanitize_float(intex_float): try: intex_float = intex_float.replace(",", "") - except AttributeError: - return intex_float - else: if "(" in intex_float: return - float(intex_float[1:-1]) else: return float(intex_float) + except (AttributeError, ValueError): + return intex_float def upload_data(conn, dealnames, workdate): for dealname in dealnames: @@ -37,11 +37,11 @@ def upload_data(conn, dealnames, workdate): with open( os.path.join(root, "data", "Collaterals_" + workdate, dealname.upper() + "_AD.txt"), "r", encoding='windows-1252') as fh: dr = csv.DictReader(fh, dialect = 'excel-tab') - missingfields = set(fields) - set(dr.fieldnames) + missingfields = set(fields).union({'Gross Margin'}) - set(dr.fieldnames) if "LoanX ID" in missingfields: print("LoanX ID column is missing. Probably an error in exporting from intex") pdb.set_trace() - data = [] + data = {} for line in dr: for f in missingfields: line[f] = None @@ -49,12 +49,19 @@ def upload_data(conn, dealnames, workdate): line['Fixed or Float'] = line['Fixed or Float'].upper() line['LoanX ID'] = line['LoanX ID'].upper() line['CUSIP'] = line['CUSIP'].upper() + line['Asset Subtype'] = line['Asset Subtype'].replace("Reinvest ", "") except AttributeError: pass else: + #sanity checks for loanxid and cusip if len(line['LoanX ID']) > 8: print("dubious id found: {0}".format(line['LoanX ID'])) line['LoanX ID'] = line['LoanX ID'][:8] + if len(line['CUSIP']) > 9: + print("dubious CUSIP found: {0}".format(line['CUSIP'])) + line['CUSIP'] = line['CUSIP'][:9] + if len(line['Asset Subtype'])>10: + line['Assert Subtype'] = line['Assert Subtype'][:9] if 'Reinvest Collat' not in missingfields and \ line['Reinvest Collat'].upper() == 'Y' or line['Issuer'] == '': @@ -76,24 +83,26 @@ def upload_data(conn, dealnames, workdate): line['Life Floor'] = float(line['Life Floor']) - float(line['Spread']) except ValueError: line['Life Floor'] = float('Nan') - if line['CUSIP']== 'XAQ3930AAB43': - line['CUSIP']='BL078321' + + # we take care of reinvestment asset lines + if not line['Asset Name']: + line['Asset Name'] = 'Reinv' r = [convertToNone(line[field]) for field in fields] - if r[fields.index('CUSIP')] and len(r[fields.index('CUSIP')])>9: - pdb.set_trace() - data.append(r) + #sometimes the Asset Name is not unique (we add random tag in this case) + if r[0] in data: + r[0] = r[0] + str(uuid.uuid4())[:3] + data[r[0]] = r[1:] sqlstr = "select distinct(updatedate) from et_collateral where dealname= %s" old_update_dates = [date[0] for date in query_db(sqlstr, params=(dealname,), one=False)] - sqlstr = 'select max("Latest Update") from clo_universe where dealname= %s' - updatedate = query_db(sqlstr, params=(dealname,))[0] - + sqlstr = "select max(\"Latest Update\") from clo_universe where dealname= %s" + # sanity check if we already have the data reinsert = False if updatedate in old_update_dates: sqlstr = "SELECT count(*) FROM et_collateral where dealname = %s and updatedate= %s" currlen = query_db(sqlstr, params = (dealname, updatedate))[0] - if currlen != len(data): + if currlen != len(data): #then we delete and just reupload print("{0} has {1} rows in the database and current collateral file has {2}".format(dealname, currlen, len(data))) with conn.cursor() as c: sqlstr = "DELETE FROM et_collateral where dealname = %s and updatedate = %s" @@ -102,7 +111,6 @@ def upload_data(conn, dealnames, workdate): reinsert = True if reinsert or not old_update_dates or updatedate not in old_update_dates: - tag = 0 sql_fields = ["dealname", "updatedate", "name", "IssuerName", "CurrentBalance", "Maturity", "AssetSubtype", "AssetType", "GrossCoupon", "Spread", "Frequency", "NextPaydate", "SecondLien", "LoanXID", @@ -111,38 +119,16 @@ def upload_data(conn, dealnames, workdate): "Liborfloor", "ReinvFlag", "Currency"] sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format(",".join(sql_fields), ",".join(["%s"] * len(sql_fields))) - c = conn.cursor() - for row in data: - # these next three ifs are to take care of reinvestment asset lines - if not row[0]: - row[0] = 'Reinv' + with conn.cursor() as c: try: - if 'Reinvest' in row[4]: - row[4] = row[4].replace("Reinvest ", "") - if len(row[4])>10: - row[4] = row[4][:9] - except (AttributeError, TypeError): - pass - - try: - c.execute(sqlstr, (dealname, updatedate) + tuple(row)) + c.executemany(sqlstr, [(dealname, updatedate, k) + tuple(v) for k, v in data.items()]) except psycopg2.DataError as detail: print(detail) pdb.set_trace() - except psycopg2.IntegrityError as detail: + except TypeError as detail: print(detail) - # crazy hack intex unique id is not really unique - conn.rollback() - # make sure the loan name is unique by tagging it - row[0] = row[0] + "_tag_" + str(tag) - tag = tag + 1 - c.execute(sqlstr, (dealname, updatedate) + tuple(row)) - conn.commit() - except TypeError: pdb.set_trace() - else: - conn.commit() - c.close() + conn.commit() if __name__ == "__main__": if len(sys.argv) > 1: |
