diff options
Diffstat (limited to 'python/intex/load_intex_collateral.py')
| -rw-r--r-- | python/intex/load_intex_collateral.py | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/python/intex/load_intex_collateral.py b/python/intex/load_intex_collateral.py new file mode 100644 index 00000000..1b0a7c45 --- /dev/null +++ b/python/intex/load_intex_collateral.py @@ -0,0 +1,149 @@ +import psycopg2 +import os, csv, datetime +import pdb +from common import root, sanitize_float +from db import conn +import sys +import uuid +from load_indicative import upload_cusip_data, upload_deal_data +import logging + +fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Asset Maturity Date', + 'Asset Subtype', 'Asset Type', 'Gross Coupon', 'Spread', \ + 'Frequency', 'Next Paydate', 'Second Lien', 'LoanX ID', 'CUSIP', + 'Market Price', 'Market Price Source', 'Market Price Date', 'Fixed or Float', \ + 'Defaulted Flag', 'Security Sub-Category', 'Structured Finance Security', \ + 'Life Floor', 'Reinvest Collat', 'Native Currency', "Moody's Industry Name", + "Country"] + +def convertToNone(s): + return None if s=='' else s + +def windows1252_encoder(fh): + for line in fh: + yield line.decode('windows-1252').encode('utf-8') + +def upload_data(conn, dealnames, workdate): + for dealname in dealnames: + basedir = os.path.join(root, "data", "Collaterals_" + workdate) + if sys.version_info[0]==2: + fh = open(os.path.join(basedir, dealname.upper() + "_AD.txt")) + fh = windows1252_encoder(fh) + else: + fh = open(os.path.join(basedir, dealname.upper() + "_AD.txt"), encoding="windows-1252") + + dr = csv.DictReader(fh, dialect = 'excel-tab') + missingfields = set(fields).union({'Gross Margin'}) - set(dr.fieldnames) + if "LoanX ID" in missingfields: + msg = "{0}: LoanX ID column is missing. Probably an error in exporting from intex" + logging.warning(msg.format(dealname)) + data = {} + for line in dr: + for f in missingfields: + line[f] = None + for key in ['LoanX ID', 'CUSIP', 'Fixed or Float']: + if line[key]: + line[key] = line[key].upper() + if line['Asset Subtype']: + line['Asset Subtype'] = line['Asset Subtype'].replace("Reinvest ", "").replace("Reinv ","") + for key, l in [('LoanX ID', 8), ('CUSIP', 9), ('Asset Subtype', 10)]: + if line[key]: + if len(line[key]) > l: + logging.warning("dubious {0} found: {1}".format(key, line[key])) + line[key] = line[key][:l] + + if 'Reinvest Collat' in line and line['Reinvest Collat'].upper() == 'Y': + # assume it's a reinvestment asset + line['Reinvest Collat'] = True + line['Issuer'] = line['ID Number'] + if not line['Spread']: + line['Spread'] = line['Gross Margin'] + + for field in ['Spread', 'Gross Coupon', 'Market Price', 'Contributed Balance']: + line[field] = sanitize_float(line[field]) + if line['Market Price'] == 0: + line['Market Price'] = None + #we store the Libor Floor in the database, so Life Floor is really Libor Floor + if line['Life Floor'] == "No limit": + line['Life Floor'] = 0 + elif line['Life Floor']: + try: + line['Life Floor'] = float(line['Life Floor']) - float(line['Spread']) + except ValueError: + line['Life Floor'] = float('Nan') + + # we take care of reinvestment asset lines + if not line['Asset Name']: + line['Asset Name'] = 'Reinv' + r = [convertToNone(line[field]) for field in fields] + #sometimes the Asset Name is not unique (we add random tag in this case) + if r[0] in data: + r[0] = r[0] + str(uuid.uuid4())[:3] + data[r[0]] = r[1:] + fh.close() + sqlstr1 = "select distinct(updatedate) from et_collateral where dealname= %s" + sqlstr2 = "select max(\"Latest Update\") from clo_universe where dealname= %s and \"Latest Update\"<=%s" + with conn.cursor() as c: + c.execute(sqlstr1, (dealname,)) + old_update_dates = [date[0] for date in c] + c.execute(sqlstr2, (dealname, workdate)) + updatedate = c.fetchone()[0] + conn.commit() + # sanity check if we already have the data + reinsert = False + if updatedate in old_update_dates: + sqlstr = "SELECT count(*) FROM et_collateral where dealname = %s and updatedate= %s" + with conn.cursor() as c: + c.execute(sqlstr, (dealname, updatedate)) + currlen = c.fetchone()[0] + conn.commit() + if currlen != len(data): #then we delete and just reupload + logging.warning("{0} has {1} rows in the database " \ + "and current collateral file has {2}".format(dealname, currlen, len(data))) + with conn.cursor() as c: + sqlstr = "DELETE FROM et_collateral where dealname = %s and updatedate = %s" + c.execute(sqlstr, (dealname, updatedate)) + conn.commit() + reinsert = True + + if reinsert or not old_update_dates or updatedate not in old_update_dates: + sql_fields = ["dealname", "updatedate", "name", "IssuerName", "CurrentBalance", + "Maturity", "AssetSubtype", "AssetType", "GrossCoupon", + "Spread", "Frequency", "NextPaydate", "SecondLien", "LoanXID", + "Cusip", "IntexPrice", "IntexPriceSource", "IntexPriceDate", + "FixedOrFloat", "DefaultedFlag", "CovLite", "isCDO", + "Liborfloor", "ReinvFlag", "Currency", "Industry", "Country"] + sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format(",".join(sql_fields), + ",".join(["%s"] * len(sql_fields))) + with conn.cursor() as c: + try: + c.executemany(sqlstr, [(dealname, updatedate, k) + tuple(v) for k, v in data.items()]) + except (psycopg2.DataError, psycopg2.IntegrityError, TypeError) as detail: + logging.error(detail) + pdb.set_trace() + conn.commit() + +def intex_data(conn, workdate): + dealnames = [d.replace("_AD.txt", "").lower() for d in + os.listdir(os.path.join(root, "data", "Collaterals_" + workdate))] + basedir = os.path.join(root, "data", "Indicative_" + workdate) + files = [os.path.join(basedir, f) for f in os.listdir(basedir)] + cusip_files = [f for f in files if "TrInfo" in f] + deal_files = [f for f in files if "TrInfo" not in f] + #first load deal data + for deal_file in deal_files: + upload_deal_data(conn, deal_file) + #then load tranche data + for cusip_file in cusip_files: + upload_cusip_data(conn, cusip_file) + + upload_data(conn, dealnames, workdate) + +if __name__ == "__main__": + if len(sys.argv) > 1: + workdate = sys.argv[1] + else: + workdate = str(datetime.date.today()) + intex_data(conn, workdate) + conn.close() + print("done") |
