import psycopg2 import os, csv, datetime import pdb from common import root, sanitize_float from db import conn import sys import uuid from load_indicative import upload_cusip_data, upload_deal_data import logging fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Asset Maturity Date', 'Asset Subtype', 'Asset Type', 'Gross Coupon', 'Spread', \ 'Frequency', 'Next Paydate', 'Second Lien', 'LoanX ID', 'CUSIP', 'Market Price', 'Market Price Source', 'Market Price Date', 'Fixed or Float', \ 'Defaulted Flag', 'Security Sub-Category', 'Structured Finance Security', \ 'Life Floor', 'Reinvest Collat', 'Native Currency', "Moody's Industry Name", "Country"] def convertToNone(s): return None if s=='' else s def windows1252_encoder(fh): for line in fh: yield line.decode('windows-1252').encode('utf-8') def upload_data(conn, dealnames, workdate): for dealname in dealnames: #dealname, updatedate = line.rstrip().split() # updatedate = datetime.datetime.strptime(updatedate, '%m/%d/%Y') #dealname = dealname.upper() + ",AD.txt basedir = os.path.join(root, "data", "Collaterals_" + workdate) if sys.version_info[0]==2: fh = open(os.path.join(basedir, dealname.upper() + "_AD.txt")) fh = windows1252_encoder(fh) else: fh = open(os.path.join(basedir, dealname.upper() + "_AD.txt"), encoding="windows-1252") dr = csv.DictReader(fh, dialect = 'excel-tab') missingfields = set(fields).union({'Gross Margin'}) - set(dr.fieldnames) if "LoanX ID" in missingfields: msg = "{0}: LoanX ID column is missing. Probably an error in exporting from intex".format(dealname) logging.warning(msg) data = {} for line in dr: for f in missingfields: line[f] = None if line['LoanX ID']: line['LoanX ID'] = line['LoanX ID'].upper() if len(line['LoanX ID']) > 8: logging.warning("dubious id found: {0}".format(line['LoanX ID'])) line['LoanX ID'] = line['LoanX ID'][:8] if line['Fixed or Float']: line['Fixed or Float'] = line['Fixed or Float'].upper() if line['CUSIP']: line['CUSIP'] = line['CUSIP'].upper() if len(line['CUSIP']) > 9: logging.warning("dubious CUSIP found: {0}".format(line['CUSIP'])) line['CUSIP'] = line['CUSIP'][:9] if line['Asset Subtype']: line['Asset Subtype'] = line['Asset Subtype'].replace("Reinvest ", "").replace("Reinv ","") if len(line['Asset Subtype'])>10: line['Asset Subtype'] = line['Asset Subtype'][:9] if 'Reinvest Collat' not in missingfields and \ line['Reinvest Collat'].upper() == 'Y' or line['Issuer'] == '': # assume it's a reinvestment asset line['Reinvest Collat'] = True line['Issuer'] = line['ID Number'] if not line['Spread']: line['Spread'] = line['Gross Margin'] for field in ['Spread', 'Gross Coupon', 'Market Price', 'Contributed Balance']: line[field] = sanitize_float(line[field]) if line['Market Price'] == 0: line['Market Price'] = None #we store the Libor Floor in the database, so Life Floor is really Libor Floor if line['Life Floor'] == "No limit": line['Life Floor'] = 0 elif line['Life Floor']: try: line['Life Floor'] = float(line['Life Floor']) - float(line['Spread']) except ValueError: line['Life Floor'] = float('Nan') # we take care of reinvestment asset lines if not line['Asset Name']: line['Asset Name'] = 'Reinv' r = [convertToNone(line[field]) for field in fields] #sometimes the Asset Name is not unique (we add random tag in this case) if r[0] in data: r[0] = r[0] + str(uuid.uuid4())[:3] data[r[0]] = r[1:] fh.close() sqlstr1 = "select distinct(updatedate) from et_collateral where dealname= %s" sqlstr2 = "select max(\"Latest Update\") from clo_universe where dealname= %s and \"Latest Update\"<=%s" with conn.cursor() as c: c.execute(sqlstr1, (dealname,)) old_update_dates = [date[0] for date in c] c.execute(sqlstr2, (dealname, workdate)) updatedate = c.fetchone()[0] conn.commit() # sanity check if we already have the data reinsert = False if updatedate in old_update_dates: sqlstr = "SELECT count(*) FROM et_collateral where dealname = %s and updatedate= %s" with conn.cursor() as c: c.execute(sqlstr, (dealname, updatedate)) currlen = c.fetchone()[0] conn.commit() if currlen != len(data): #then we delete and just reupload logging.warning("{0} has {1} rows in the database " \ "and current collateral file has {2}".format(dealname, currlen, len(data))) with conn.cursor() as c: sqlstr = "DELETE FROM et_collateral where dealname = %s and updatedate = %s" c.execute(sqlstr, (dealname, updatedate)) conn.commit() reinsert = True if reinsert or not old_update_dates or updatedate not in old_update_dates: sql_fields = ["dealname", "updatedate", "name", "IssuerName", "CurrentBalance", "Maturity", "AssetSubtype", "AssetType", "GrossCoupon", "Spread", "Frequency", "NextPaydate", "SecondLien", "LoanXID", "Cusip", "IntexPrice", "IntexPriceSource", "IntexPriceDate", "FixedOrFloat", "DefaultedFlag", "CovLite", "isCDO", "Liborfloor", "ReinvFlag", "Currency", "Industry", "Country"] sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format(",".join(sql_fields), ",".join(["%s"] * len(sql_fields))) with conn.cursor() as c: try: c.executemany(sqlstr, [(dealname, updatedate, k) + tuple(v) for k, v in data.items()]) except (psycopg2.DataError, TypeError) as detail: logging.error(detail) pdb.set_trace() conn.commit() def intex_data(conn, workdate): dealnames = [d.replace("_AD.txt", "").lower() for d in os.listdir(os.path.join(root, "data", "Collaterals_" + workdate))] basedir = os.path.join(root, "data", "Indicative_" + workdate) files = [os.path.join(basedir, f) for f in os.listdir(basedir)] cusip_files = [f for f in files if "TrInfo" in f] deal_files = [f for f in files if "TrInfo" not in f] #first load deal data for deal_file in deal_files: upload_deal_data(conn, deal_file) #then load tranche data for cusip_file in cusip_files: upload_cusip_data(conn, cusip_file) upload_data(conn, dealnames, workdate) if __name__ == "__main__": if len(sys.argv) > 1: workdate = sys.argv[1] else: workdate = str(datetime.date.today()) intex_data(conn, workdate) conn.close() print("done")