import psycopg2 import os.path import re import csv import datetime import pdb import common import sys fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Maturity Date', \ 'Asset Subtype', 'Asset Type', 'Gross Coupon', 'Spread', 'Frequency', \ 'Next Paydate', 'Second Lien', 'LoanX ID', 'CUSIP', 'Market Price', \ 'Market Price Source', 'Price Date', 'Fixed or Float', \ 'Defaulted Flag', 'Security Sub-Category', \ 'Structured Finance Security', 'Life Floor', 'Reinvest Collat'] def convertToNone(s): return None if s=='' else s def sanitize_float(intex_float): intex_float = intex_float.replace(",", "") if "(" in intex_float: intex_float = - float(intex_float[1:-1]) else: intex_float = float(intex_float) return intex_float def get_latest_dealupdate(workdate): with open(os.path.join(common.root, "data", "clo_universe_intex_" + workdate + ".txt")) as fh: dr = csv.DictReader(fh, dialect='excel-tab') deal_table = {line['Deal,Tr/CUSIP/ISIN']: datetime.datetime.strptime(line['Latest Update'], '%b %d, %Y').date() \ for line in dr if 'Paid' not in line['Latest Update'] and line['Latest Update']} return deal_table def upload_data(dealnames, workdate, conn, cursor): for dealname in dealnames: #dealname, updatedate = line.rstrip().split() # updatedate = datetime.datetime.strptime(updatedate, '%m/%d/%Y') #dealname = dealname.upper() + ",AD.txt with open( os.path.join(common.root, "data", "Collaterals_" + workdate, dealname.upper() + ",AD.txt"), "r") as fh: dr = csv.DictReader(fh, dialect = 'excel-tab') missingfields = set(fields) - set(dr.fieldnames) if "LoanX ID" in missingfields: print "LoanX ID column is missing. Probably an error in exporting from intex" pdb.set_trace() data = [] for line in dr: for f in missingfields: line[f] = None if 'Gross Margin' not in line: line['Gross Margin'] = None if line['Fixed or Float']: line['Fixed or Float'] = line['Fixed or Float'].upper() if line['LoanX ID']: line['LoanX ID'] = line['LoanX ID'].upper() if len(line['LoanX ID']) > 8: print "dubious id found: {0}".format(line['LoanX ID']) line['LoanX ID'] = line['LoanX ID'][:8] try: # make sure the string is utf8 safe line['Issuer'] = line['Issuer'].decode('windows-1252') if line['Reinvest Collat'].upper() == 'Y' or line['Issuer'] == '': # assume it's a reinvestment asset line['Reinvest Collat'] = True line['Issuer'] = line['ID Number'] except AttributeError as detail: print dealname if 'Spread' in missingfields: line['Spread'] = line['Gross Margin'] for field in ['Spread', 'Gross Coupon', 'Market Price', 'Contributed Balance']: if line[field]: line[field] = sanitize_float(line[field]) #we store the Libor FLoor in the database, so Life Floor is really Libor Floor if line['Life Floor'] == "No limit": line['Life Floor'] = 0 elif line['Life Floor']: try: line['Life Floor'] = float(line['Life Floor']) - float(line['Spread']) except ValueError: line['Life Floor'] = float('Nan') if line['CUSIP']== 'XAQ3930AAB43': line['CUSIP']='BL078321' if line['CUSIP']: line['CUSIP'] = line['CUSIP'].upper() r = [convertToNone(line[field]) for field in fields] if r[fields.index('CUSIP')] and len(r[fields.index('CUSIP')])>9: pdb.set_trace() data.append(r) cursor.execute( "select distinct(updatedate) from et_collateral where dealname='{0}'".format(dealname)) old_update_dates = [date[0] for date in cursor.fetchall()] deal_table = get_latest_dealupdate(workdate) try: updatedate = deal_table[dealname] except KeyError as detail: print detail pdb.set_trace() reinsert = False if updatedate in old_update_dates: sqlstr = "SELECT count(*) FROM et_collateral where dealname = %s and updatedate= %s" cursor.execute(sqlstr, (dealname, updatedate)) currlen = cursor.fetchone()[0] if currlen <> len(data): print "{0} has {1} rows in the database and current collateral file has {2}".format(dealname, currlen, len(data)) sqlstr = "DELETE FROM et_collateral where dealname = %s and updatedate = %s" cursor.execute(sqlstr, (dealname, updatedate)) conn.commit() reinsert = True if reinsert or not old_update_dates or updatedate not in old_update_dates: tag = 0 for row in data: # these next three ifs are to take care of reinvestment asset lines if not row[0]: row[0] = 'Reinv' if row[4] and 'Reinvest' in row[4]: row[4] = row[4].replace("Reinvest ", "") if row[4] and len(row[4])>10: row[4] = row[4][:9] sql_fields = ["dealname", "updatedate", "name", "IssuerName", "CurrentBalance", "Maturity", "AssetSubtype", "AssetType", "GrossCoupon", "Spread", "Frequency", "NextPaydate", "SecondLien", "LoanXID", "Cusip", "IntexPrice", "IntexPriceSource", "IntexPriceDate", "FixedOrFloat", "DefaultedFlag", "CovLite", "isCDO", "Liborfloor", "ReinvFlag"] sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format(",".join(sql_fields), ",".join(["%s"] * len(sql_fields))) try: cursor.execute(sqlstr, (dealname, updatedate) + tuple(row)) except psycopg2.DataError as detail: print detail pdb.set_trace() except psycopg2.IntegrityError as detail: print detail # crazy hack intex unique id is not really unique conn.rollback() # make sure the loan name is unique by tagging it row[0] = row[0] + "_tag_" + str(tag) tag = tag + 1 cursor.execute(sqlstr, (dealname, updatedate) + tuple(row)) except TypeError: pdb.set_trace() conn.commit() if __name__ == "__main__": if len(sys.argv) > 1: workdate = sys.argv[1] else: workdate = str(datetime.date.today()) if len(sys.argv) > 2: dealnames = sys.argv[2:] else: dealnames = [d.split(",")[0].lower() for d in os.listdir(os.path.join(common.root, "data", "Collaterals_" + workdate))] upload_data(dealnames, workdate, common.conn, common.cursor) common.cursor.close() common.conn.close() print "done"