diff options
Diffstat (limited to 'python/intex/load_intex_collateral.py')
| -rw-r--r-- | python/intex/load_intex_collateral.py | 186 |
1 files changed, 130 insertions, 56 deletions
diff --git a/python/intex/load_intex_collateral.py b/python/intex/load_intex_collateral.py index d848887b..9fdfa9d6 100644 --- a/python/intex/load_intex_collateral.py +++ b/python/intex/load_intex_collateral.py @@ -11,30 +11,77 @@ import logging logger = logging.getLogger(__name__) -fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Asset Maturity Date', - 'Asset Subtype', 'Asset Type', 'Gross Coupon', 'Spread', \ - 'Frequency', 'Next Paydate', 'Second Lien', 'LoanX ID', 'CUSIP', - 'Market Price', 'Market Price Source', 'Market Price Date', 'Fixed or Float', \ - 'Defaulted Flag', 'Security Sub-Category', 'Structured Finance Security', \ - 'Life Floor', 'Reinvest Collat', 'Native Currency', "Moody's Industry Name", - "Country", "Amortization Schedule"] +fields = [ + "Asset Name", + "Issuer", + "Contributed Balance", + "Asset Maturity Date", + "Asset Subtype", + "Asset Type", + "Gross Coupon", + "Spread", + "Frequency", + "Next Paydate", + "Second Lien", + "LoanX ID", + "CUSIP", + "Market Price", + "Market Price Source", + "Market Price Date", + "Fixed or Float", + "Defaulted Flag", + "Security Sub-Category", + "Structured Finance Security", + "Life Floor", + "Reinvest Collat", + "Native Currency", + "Moody's Industry Name", + "Country", + "Amortization Schedule", +] + def upload_data(conn, workdate): - sql_fields = ["dealname", "updatedate", "name", "IssuerName", "CurrentBalance", - "Maturity", "AssetSubtype", "AssetType", "GrossCoupon", - "Spread", "Frequency", "NextPaydate", "SecondLien", "LoanXID", - "Cusip", "IntexPrice", "IntexPriceSource", "IntexPriceDate", - "FixedOrFloat", "DefaultedFlag", "CovLite", "isCDO", - "Liborfloor", "ReinvFlag", "Currency", "Industry", "Country", - "amort_schedule_dates", "amort_schedule_amounts"] - sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format(",".join(sql_fields), - ",".join(["%s"] * len(sql_fields))) - basedir = Path(os.environ['DATA_DIR']) / ("Collaterals_" + workdate) + sql_fields = [ + "dealname", + "updatedate", + "name", + "IssuerName", + "CurrentBalance", + "Maturity", + "AssetSubtype", + "AssetType", + "GrossCoupon", + "Spread", + "Frequency", + "NextPaydate", + "SecondLien", + "LoanXID", + "Cusip", + "IntexPrice", + "IntexPriceSource", + "IntexPriceDate", + "FixedOrFloat", + "DefaultedFlag", + "CovLite", + "isCDO", + "Liborfloor", + "ReinvFlag", + "Currency", + "Industry", + "Country", + "amort_schedule_dates", + "amort_schedule_amounts", + ] + sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format( + ",".join(sql_fields), ",".join(["%s"] * len(sql_fields)) + ) + basedir = Path(os.environ["DATA_DIR"]) / ("Collaterals_" + workdate) for fname in basedir.iterdir(): fh = open(fname, encoding="windows-1252") dealname = fname.name.rsplit("_", 1)[0].lower() - dr = csv.DictReader(fh, dialect='excel-tab') - missingfields = set(fields).union({'Gross Margin'}) - set(dr.fieldnames) + dr = csv.DictReader(fh, dialect="excel-tab") + missingfields = set(fields).union({"Gross Margin"}) - set(dr.fieldnames) if "LoanX ID" in missingfields: msg = "{0}: LoanX ID column is missing. Probably an error in exporting from intex" logger.warning(msg.format(dealname)) @@ -42,70 +89,85 @@ def upload_data(conn, workdate): for line in dr: for f in missingfields: line[f] = None - for key in ['LoanX ID', 'CUSIP', 'Fixed or Float']: + for key in ["LoanX ID", "CUSIP", "Fixed or Float"]: if line[key]: line[key] = line[key].upper() - if line['Asset Subtype']: - line['Asset Subtype'] = line['Asset Subtype'].replace("Reinvest ", "").replace("Reinv ","") - if line['Second Lien']: - line['Second Lien'] = line['Second Lien'].replace("Second Lien", "Y") - for key, l in [('LoanX ID', 8), ('CUSIP', 9), ('Asset Subtype', 10)]: + if line["Asset Subtype"]: + line["Asset Subtype"] = ( + line["Asset Subtype"].replace("Reinvest ", "").replace("Reinv ", "") + ) + if line["Second Lien"]: + line["Second Lien"] = line["Second Lien"].replace("Second Lien", "Y") + for key, l in [("LoanX ID", 8), ("CUSIP", 9), ("Asset Subtype", 10)]: if line[key]: if len(line[key]) > l: logger.warning("dubious {0} found: {1}".format(key, line[key])) line[key] = line[key][:l] - if 'Reinvest Collat' not in missingfields and line['Reinvest Collat'].upper() == 'Y': + if ( + "Reinvest Collat" not in missingfields + and line["Reinvest Collat"].upper() == "Y" + ): # assume it's a reinvestment asset - line['Reinvest Collat'] = True - line['Issuer'] = line['ID Number'] - if not line['Spread']: - line['Spread'] = line['Gross Margin'] + line["Reinvest Collat"] = True + line["Issuer"] = line["ID Number"] + if not line["Spread"]: + line["Spread"] = line["Gross Margin"] - for field in ['Spread', 'Gross Coupon', 'Market Price', 'Contributed Balance']: + for field in [ + "Spread", + "Gross Coupon", + "Market Price", + "Contributed Balance", + ]: line[field] = sanitize_float(line[field]) - if line['Market Price'] == 0: - line['Market Price'] = None - #we store the Libor Floor in the database, so Life Floor is really Libor Floor - if line['Life Floor'] == "No limit": - line['Life Floor'] = 0 - elif line['Life Floor']: + if line["Market Price"] == 0: + line["Market Price"] = None + # we store the Libor Floor in the database, so Life Floor is really Libor Floor + if line["Life Floor"] == "No limit": + line["Life Floor"] = 0 + elif line["Life Floor"]: try: - line['Life Floor'] = float(line['Life Floor']) - float(line['Spread']) + line["Life Floor"] = float(line["Life Floor"]) - float( + line["Spread"] + ) except ValueError: - line['Life Floor'] = float('Nan') + line["Life Floor"] = float("Nan") # we take care of reinvestment asset lines - if not line['Asset Name']: - line['Asset Name'] = 'Reinv' + if not line["Asset Name"]: + line["Asset Name"] = "Reinv" try: - line['Asset Maturity Date'] = datetime.datetime.strptime(line['Asset Maturity Date'], - '%b %d, %Y').date() + line["Asset Maturity Date"] = datetime.datetime.strptime( + line["Asset Maturity Date"], "%b %d, %Y" + ).date() except (ValueError, TypeError): - line['Asset Maturity Date'] = None + line["Asset Maturity Date"] = None r = [line[field] or None for field in fields] if line["Amortization Schedule"]: temp = line["Amortization Schedule"].split("; ") temp = [e.split(" @ ") for e in temp] amounts, dates = zip(*temp) - if '%' in amounts[0]: #for reinvestment assets amort is in percentage. + if "%" in amounts[0]: # for reinvestment assets amort is in percentage. amounts = [float(e.replace("%", "")) / 100 for e in amounts] else: amounts = [float(e.replace(",", "")) for e in amounts] - dates = [datetime.datetime.strptime(e, "%b %d, %Y").date() for e in dates] + dates = [ + datetime.datetime.strptime(e, "%b %d, %Y").date() for e in dates + ] r[-1] = dates r.append(amounts) else: r.append(None) - #sometimes the Asset Name is not unique (we add random tag in this case) + # sometimes the Asset Name is not unique (we add random tag in this case) if r[0] in data: r[0] = r[0] + str(uuid.uuid4())[:3] data[r[0]] = r[1:] fh.close() sqlstr1 = "select distinct(updatedate) from et_collateral where dealname= %s" - sqlstr2 = "select max(\"Latest Update\") from clo_universe where dealname= %s and \"Latest Update\"<=%s" + sqlstr2 = 'select max("Latest Update") from clo_universe where dealname= %s and "Latest Update"<=%s' with conn.cursor() as c: c.execute(sqlstr1, (dealname,)) old_update_dates = [date[0] for date in c] @@ -120,9 +182,13 @@ def upload_data(conn, workdate): c.execute(sql_str, (dealname, updatedate)) currlen = c.fetchone()[0] conn.commit() - if currlen != len(data): #then we delete and just reupload - logger.warning("{0} has {1} rows in the database " \ - "and current collateral file has {2}".format(dealname, currlen, len(data))) + if currlen != len(data): # then we delete and just reupload + logger.warning( + "{0} has {1} rows in the database " + "and current collateral file has {2}".format( + dealname, currlen, len(data) + ) + ) with conn.cursor() as c: sql_str = "DELETE FROM et_collateral where dealname = %s and updatedate = %s" c.execute(sql_str, (dealname, updatedate)) @@ -132,21 +198,29 @@ def upload_data(conn, workdate): if reinsert or not old_update_dates or updatedate not in old_update_dates: with conn.cursor() as c: try: - c.executemany(sqlstr, [(dealname, updatedate, k) + tuple(v) for k, v in data.items()]) - except (psycopg2.DataError, psycopg2.IntegrityError, TypeError) as detail: + c.executemany( + sqlstr, + [(dealname, updatedate, k) + tuple(v) for k, v in data.items()], + ) + except ( + psycopg2.DataError, + psycopg2.IntegrityError, + TypeError, + ) as detail: logger.error(detail) conn.rollback() else: conn.commit() + def intex_data(conn, workdate): - basedir = Path(os.environ['DATA_DIR']) / ("Indicative_" + workdate) + basedir = Path(os.environ["DATA_DIR"]) / ("Indicative_" + workdate) cusip_files = [f for f in basedir.iterdir() if "TrInfo" in f.name] deal_files = [f for f in basedir.iterdir() if "TrInfo" not in f.name] - #first load deal data + # first load deal data for deal_file in deal_files: upload_deal_data(conn, deal_file) - #then load tranche data + # then load tranche data for cusip_file in cusip_files: upload_cusip_data(conn, cusip_file) |
