import psycopg2 import os, csv, datetime import pdb from common import sanitize_float from . import dbconn from pathlib import Path import sys import uuid from .load_indicative import upload_cusip_data, upload_deal_data import logging logger = logging.getLogger(__name__) fields = [ "Asset Name", "Issuer", "Contributed Balance", "Asset Maturity Date", "Asset Subtype", "Asset Type", "Gross Coupon", "Spread", "Frequency", "Next Paydate", "Second Lien", "LoanX ID", "CUSIP", "Market Price", "Market Price Source", "Market Price Date", "Fixed or Float", "Defaulted Flag", "Security Sub-Category", "Structured Finance Security", "Life Floor", "Reinvest Collat", "Native Currency", "Moody's Industry Name", "Country", "Amortization Schedule", ] def upload_data(conn, workdate): sql_fields = [ "dealname", "updatedate", "name", "IssuerName", "CurrentBalance", "Maturity", "AssetSubtype", "AssetType", "GrossCoupon", "Spread", "Frequency", "NextPaydate", "SecondLien", "LoanXID", "Cusip", "IntexPrice", "IntexPriceSource", "IntexPriceDate", "FixedOrFloat", "DefaultedFlag", "CovLite", "isCDO", "Liborfloor", "ReinvFlag", "Currency", "Industry", "Country", "amort_schedule_dates", "amort_schedule_amounts", ] sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format( ",".join(sql_fields), ",".join(["%s"] * len(sql_fields)) ) basedir = Path(os.environ["DATA_DIR"]) / ("Collaterals_" + workdate) for fname in basedir.iterdir(): if fname.stem.endswith("zip"): continue fh = open(fname, encoding="windows-1252") dealname = fname.name.rsplit("_", 1)[0].lower() dr = csv.DictReader(fh, dialect="excel-tab") missingfields = set(fields).union({"Gross Margin"}) - set(dr.fieldnames) if "LoanX ID" in missingfields: msg = "{0}: LoanX ID column is missing. Probably an error in exporting from intex" logger.warning(msg.format(dealname)) data = {} for line in dr: for f in missingfields: line[f] = None for key in ["LoanX ID", "CUSIP", "Fixed or Float"]: if line[key]: line[key] = line[key].upper() if line["Asset Subtype"]: line["Asset Subtype"] = ( line["Asset Subtype"].replace("Reinvest ", "").replace("Reinv ", "") ) if line["Second Lien"]: line["Second Lien"] = line["Second Lien"].replace("Second Lien", "Y") for key, l in [("LoanX ID", 8), ("CUSIP", 9), ("Asset Subtype", 10)]: if line[key]: if len(line[key]) > l: logger.warning(f"dubious {key} found: {line[key]}") line[key] = line[key][:l] if ( "Reinvest Collat" not in missingfields and line["Reinvest Collat"].upper() == "Y" ): # assume it's a reinvestment asset line["Reinvest Collat"] = True line["Issuer"] = line["ID Number"] if not line["Spread"]: line["Spread"] = line["Gross Margin"] for field in [ "Spread", "Gross Coupon", "Market Price", "Contributed Balance", ]: line[field] = sanitize_float(line[field]) if line["Market Price"] == 0: line["Market Price"] = None # we store the Libor Floor in the database, so Life Floor is really Libor Floor if line["Life Floor"] == "No limit": line["Life Floor"] = 0 elif line["Life Floor"]: try: line["Life Floor"] = float(line["Life Floor"]) - float( line["Spread"] ) except ValueError: line["Life Floor"] = float("Nan") # we take care of reinvestment asset lines if not line["Asset Name"]: line["Asset Name"] = "Reinv" try: line["Asset Maturity Date"] = datetime.datetime.strptime( line["Asset Maturity Date"], "%b %d, %Y" ).date() except (ValueError, TypeError): line["Asset Maturity Date"] = None r = [line[field] or None for field in fields] if line["Amortization Schedule"]: temp = line["Amortization Schedule"].split("; ") temp = [e.split(" @ ") for e in temp] amounts, dates = zip(*temp) if "%" in amounts[0]: # for reinvestment assets amort is in percentage. amounts = [float(e.replace("%", "")) / 100 for e in amounts] else: amounts = [float(e.replace(",", "")) for e in amounts] dates = [ datetime.datetime.strptime(e, "%b %d, %Y").date() for e in dates ] r[-1] = dates r.append(amounts) else: r.append(None) # sometimes the Asset Name is not unique (we add random tag in this case) if r[0] in data: r[0] = r[0] + str(uuid.uuid4())[:3] data[r[0]] = r[1:] fh.close() sqlstr1 = "select distinct(updatedate) from et_collateral where dealname= %s" sqlstr2 = 'select max("Latest Update") from clo_universe where dealname= %s and "Latest Update"<=%s' with conn.cursor() as c: c.execute(sqlstr1, (dealname,)) old_update_dates = [date[0] for date in c] c.execute(sqlstr2, (dealname, workdate)) updatedate = c.fetchone()[0] conn.commit() # sanity check if we already have the data reinsert = False if updatedate in old_update_dates: sql_str = "SELECT count(*) FROM et_collateral where dealname = %s and updatedate= %s" with conn.cursor() as c: c.execute(sql_str, (dealname, updatedate)) currlen = c.fetchone()[0] conn.commit() if currlen != len(data): # then we delete and just reupload logger.warning( f"{dealname} has {currlen} rows in the database " f"and current collateral file has {len(data)}" ) with conn.cursor() as c: sql_str = "DELETE FROM et_collateral where dealname = %s and updatedate = %s" c.execute(sql_str, (dealname, updatedate)) conn.commit() reinsert = True if reinsert or not old_update_dates or updatedate not in old_update_dates: with conn.cursor() as c: try: c.executemany( sqlstr, [(dealname, updatedate, k) + tuple(v) for k, v in data.items()], ) except ( psycopg2.DataError, psycopg2.IntegrityError, TypeError, ) as detail: logger.error(detail) conn.rollback() else: conn.commit() def intex_data(conn, workdate): basedir = Path(os.environ["DATA_DIR"]) / ("Indicative_" + workdate) cusip_files = [f for f in basedir.iterdir() if "TrInfo" in f.name] deal_files = [f for f in basedir.iterdir() if "TrInfo" not in f.name] # first load deal data for deal_file in deal_files: upload_deal_data(conn, deal_file) # then load tranche data for cusip_file in cusip_files: upload_cusip_data(conn, cusip_file) upload_data(conn, workdate)