aboutsummaryrefslogtreecommitdiffstats
path: root/python/load_intex_collateral.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/load_intex_collateral.py')
-rw-r--r--python/load_intex_collateral.py70
1 files changed, 28 insertions, 42 deletions
diff --git a/python/load_intex_collateral.py b/python/load_intex_collateral.py
index cfa4d71f..b530d246 100644
--- a/python/load_intex_collateral.py
+++ b/python/load_intex_collateral.py
@@ -7,9 +7,10 @@ import pdb
from common import root
from db import conn, query_db
import sys
+import uuid
fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Maturity Date', \
- 'Asset Subtype', 'Asset Type', 'Gross Coupon', 'Gross Margin', 'Spread', \
+ 'Asset Subtype', 'Asset Type', 'Gross Coupon', 'Spread', \
'Frequency', 'Next Paydate', 'Second Lien', 'LoanX ID', 'CUSIP',
'Market Price', 'Market Price Source', 'Price Date', 'Fixed or Float', \
'Defaulted Flag', 'Security Sub-Category', 'Structured Finance Security', \
@@ -21,13 +22,12 @@ def convertToNone(s):
def sanitize_float(intex_float):
try:
intex_float = intex_float.replace(",", "")
- except AttributeError:
- return intex_float
- else:
if "(" in intex_float:
return - float(intex_float[1:-1])
else:
return float(intex_float)
+ except (AttributeError, ValueError):
+ return intex_float
def upload_data(conn, dealnames, workdate):
for dealname in dealnames:
@@ -37,11 +37,11 @@ def upload_data(conn, dealnames, workdate):
with open( os.path.join(root, "data", "Collaterals_" + workdate, dealname.upper() + "_AD.txt"),
"r", encoding='windows-1252') as fh:
dr = csv.DictReader(fh, dialect = 'excel-tab')
- missingfields = set(fields) - set(dr.fieldnames)
+ missingfields = set(fields).union({'Gross Margin'}) - set(dr.fieldnames)
if "LoanX ID" in missingfields:
print("LoanX ID column is missing. Probably an error in exporting from intex")
pdb.set_trace()
- data = []
+ data = {}
for line in dr:
for f in missingfields:
line[f] = None
@@ -49,12 +49,19 @@ def upload_data(conn, dealnames, workdate):
line['Fixed or Float'] = line['Fixed or Float'].upper()
line['LoanX ID'] = line['LoanX ID'].upper()
line['CUSIP'] = line['CUSIP'].upper()
+ line['Asset Subtype'] = line['Asset Subtype'].replace("Reinvest ", "")
except AttributeError:
pass
else:
+ #sanity checks for loanxid and cusip
if len(line['LoanX ID']) > 8:
print("dubious id found: {0}".format(line['LoanX ID']))
line['LoanX ID'] = line['LoanX ID'][:8]
+ if len(line['CUSIP']) > 9:
+ print("dubious CUSIP found: {0}".format(line['CUSIP']))
+ line['CUSIP'] = line['CUSIP'][:9]
+ if len(line['Asset Subtype'])>10:
+ line['Assert Subtype'] = line['Assert Subtype'][:9]
if 'Reinvest Collat' not in missingfields and \
line['Reinvest Collat'].upper() == 'Y' or line['Issuer'] == '':
@@ -76,24 +83,26 @@ def upload_data(conn, dealnames, workdate):
line['Life Floor'] = float(line['Life Floor']) - float(line['Spread'])
except ValueError:
line['Life Floor'] = float('Nan')
- if line['CUSIP']== 'XAQ3930AAB43':
- line['CUSIP']='BL078321'
+
+ # we take care of reinvestment asset lines
+ if not line['Asset Name']:
+ line['Asset Name'] = 'Reinv'
r = [convertToNone(line[field]) for field in fields]
- if r[fields.index('CUSIP')] and len(r[fields.index('CUSIP')])>9:
- pdb.set_trace()
- data.append(r)
+ #sometimes the Asset Name is not unique (we add random tag in this case)
+ if r[0] in data:
+ r[0] = r[0] + str(uuid.uuid4())[:3]
+ data[r[0]] = r[1:]
sqlstr = "select distinct(updatedate) from et_collateral where dealname= %s"
old_update_dates = [date[0] for date in query_db(sqlstr, params=(dealname,), one=False)]
- sqlstr = 'select max("Latest Update") from clo_universe where dealname= %s'
- updatedate = query_db(sqlstr, params=(dealname,))[0]
-
+ sqlstr = "select max(\"Latest Update\") from clo_universe where dealname= %s"
+ # sanity check if we already have the data
reinsert = False
if updatedate in old_update_dates:
sqlstr = "SELECT count(*) FROM et_collateral where dealname = %s and updatedate= %s"
currlen = query_db(sqlstr, params = (dealname, updatedate))[0]
- if currlen != len(data):
+ if currlen != len(data): #then we delete and just reupload
print("{0} has {1} rows in the database and current collateral file has {2}".format(dealname, currlen, len(data)))
with conn.cursor() as c:
sqlstr = "DELETE FROM et_collateral where dealname = %s and updatedate = %s"
@@ -102,7 +111,6 @@ def upload_data(conn, dealnames, workdate):
reinsert = True
if reinsert or not old_update_dates or updatedate not in old_update_dates:
- tag = 0
sql_fields = ["dealname", "updatedate", "name", "IssuerName", "CurrentBalance",
"Maturity", "AssetSubtype", "AssetType", "GrossCoupon",
"Spread", "Frequency", "NextPaydate", "SecondLien", "LoanXID",
@@ -111,38 +119,16 @@ def upload_data(conn, dealnames, workdate):
"Liborfloor", "ReinvFlag", "Currency"]
sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format(",".join(sql_fields),
",".join(["%s"] * len(sql_fields)))
- c = conn.cursor()
- for row in data:
- # these next three ifs are to take care of reinvestment asset lines
- if not row[0]:
- row[0] = 'Reinv'
+ with conn.cursor() as c:
try:
- if 'Reinvest' in row[4]:
- row[4] = row[4].replace("Reinvest ", "")
- if len(row[4])>10:
- row[4] = row[4][:9]
- except (AttributeError, TypeError):
- pass
-
- try:
- c.execute(sqlstr, (dealname, updatedate) + tuple(row))
+ c.executemany(sqlstr, [(dealname, updatedate, k) + tuple(v) for k, v in data.items()])
except psycopg2.DataError as detail:
print(detail)
pdb.set_trace()
- except psycopg2.IntegrityError as detail:
+ except TypeError as detail:
print(detail)
- # crazy hack intex unique id is not really unique
- conn.rollback()
- # make sure the loan name is unique by tagging it
- row[0] = row[0] + "_tag_" + str(tag)
- tag = tag + 1
- c.execute(sqlstr, (dealname, updatedate) + tuple(row))
- conn.commit()
- except TypeError:
pdb.set_trace()
- else:
- conn.commit()
- c.close()
+ conn.commit()
if __name__ == "__main__":
if len(sys.argv) > 1: