1 files changed, 28 insertions, 42 deletions
diff --git a/python/load_intex_collateral.py b/python/load_intex_collateral.py
index cfa4d71f..b530d246 100644
--- a/python/load_intex_collateral.py
+++ b/python/load_intex_collateral.py
@@ -7,9 +7,10 @@ import pdb
 from common import root
 from db import conn, query_db
 import sys
+import uuid
 
 fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Maturity Date', \
-          'Asset Subtype', 'Asset Type', 'Gross Coupon', 'Gross Margin', 'Spread', \
+          'Asset Subtype', 'Asset Type', 'Gross Coupon', 'Spread', \
           'Frequency', 'Next Paydate', 'Second Lien', 'LoanX ID', 'CUSIP',
           'Market Price', 'Market Price Source', 'Price Date', 'Fixed or Float', \
           'Defaulted Flag', 'Security Sub-Category', 'Structured Finance Security', \
@@ -21,13 +22,12 @@ def convertToNone(s):
 def sanitize_float(intex_float):
     try:
         intex_float = intex_float.replace(",", "")
-    except AttributeError:
-        return intex_float
-    else:
         if "(" in intex_float:
             return - float(intex_float[1:-1])
         else:
             return float(intex_float)
+    except (AttributeError, ValueError):
+        return intex_float
 
 def upload_data(conn, dealnames, workdate):
     for dealname in dealnames:
@@ -37,11 +37,11 @@ def upload_data(conn, dealnames, workdate):
         with open( os.path.join(root, "data", "Collaterals_" + workdate, dealname.upper() + "_AD.txt"),
                    "r", encoding='windows-1252') as fh:
             dr = csv.DictReader(fh, dialect = 'excel-tab')
-            missingfields = set(fields) - set(dr.fieldnames)
+            missingfields = set(fields).union({'Gross Margin'}) - set(dr.fieldnames)
             if "LoanX ID" in missingfields:
                 print("LoanX ID column is missing. Probably an error in exporting from intex")
                 pdb.set_trace()
-            data = []
+            data = {}
             for line in dr:
                 for f in missingfields:
                     line[f] = None
@@ -49,12 +49,19 @@ def upload_data(conn, dealnames, workdate):
                     line['Fixed or Float'] = line['Fixed or Float'].upper()
                     line['LoanX ID'] = line['LoanX ID'].upper()
                     line['CUSIP'] = line['CUSIP'].upper()
+                    line['Asset Subtype'] = line['Asset Subtype'].replace("Reinvest ", "")
                 except AttributeError:
                     pass
                 else:
+                    #sanity checks for loanxid and cusip
                     if len(line['LoanX ID']) > 8:
                         print("dubious id found: {0}".format(line['LoanX ID']))
                         line['LoanX ID'] = line['LoanX ID'][:8]
+                    if len(line['CUSIP']) > 9:
+                        print("dubious CUSIP found: {0}".format(line['CUSIP']))
+                        line['CUSIP'] = line['CUSIP'][:9]
+                    if len(line['Asset Subtype'])>10:
+                        line['Assert Subtype'] = line['Assert Subtype'][:9]
 
                 if 'Reinvest Collat' not in missingfields and \
                    line['Reinvest Collat'].upper() == 'Y' or line['Issuer'] == '':
@@ -76,24 +83,26 @@ def upload_data(conn, dealnames, workdate):
                         line['Life Floor'] = float(line['Life Floor']) - float(line['Spread'])
                     except ValueError:
                         line['Life Floor'] = float('Nan')
-                if line['CUSIP']== 'XAQ3930AAB43':
-                    line['CUSIP']='BL078321'
+
+                # we take care of reinvestment asset lines
+                if not line['Asset Name']:
+                    line['Asset Name'] = 'Reinv'
                 r = [convertToNone(line[field]) for field in fields]
-                if r[fields.index('CUSIP')] and len(r[fields.index('CUSIP')])>9:
-                    pdb.set_trace()
-                data.append(r)
+                #sometimes the Asset Name is not unique (we add random tag in this case)
+                if r[0] in data:
+                    r[0] = r[0] + str(uuid.uuid4())[:3]
+                data[r[0]] = r[1:]
 
         sqlstr = "select distinct(updatedate) from et_collateral where dealname= %s"
         old_update_dates = [date[0] for date in query_db(sqlstr, params=(dealname,), one=False)]
 
-        sqlstr = 'select max("Latest Update") from clo_universe where dealname= %s'
-        updatedate = query_db(sqlstr, params=(dealname,))[0]
-
+        sqlstr = "select max(\"Latest Update\") from clo_universe where dealname= %s"
+        # sanity check if we already have the data
         reinsert = False
         if updatedate in old_update_dates:
             sqlstr = "SELECT count(*) FROM et_collateral where dealname = %s and updatedate= %s"
             currlen = query_db(sqlstr, params = (dealname, updatedate))[0]
-            if currlen != len(data):
+            if currlen != len(data): #then we delete and just reupload
                 print("{0} has {1} rows in the database and current collateral file has {2}".format(dealname, currlen, len(data)))
                 with conn.cursor() as c:
                     sqlstr = "DELETE FROM et_collateral where dealname = %s and updatedate = %s"
@@ -102,7 +111,6 @@ def upload_data(conn, dealnames, workdate):
                 reinsert = True
 
         if reinsert or not old_update_dates or updatedate not in old_update_dates:
-            tag = 0
             sql_fields = ["dealname", "updatedate", "name", "IssuerName", "CurrentBalance",
                           "Maturity", "AssetSubtype", "AssetType", "GrossCoupon",
                           "Spread", "Frequency", "NextPaydate", "SecondLien", "LoanXID",
@@ -111,38 +119,16 @@ def upload_data(conn, dealnames, workdate):
                           "Liborfloor", "ReinvFlag", "Currency"]
             sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format(",".join(sql_fields),
                                                                          ",".join(["%s"] * len(sql_fields)))
-            c = conn.cursor()
-            for row in data:
-                # these next three ifs are to take care of reinvestment asset lines
-                if not row[0]:
-                    row[0] = 'Reinv'
+            with conn.cursor() as c:
                 try:
-                    if 'Reinvest' in row[4]:
-                        row[4] = row[4].replace("Reinvest ", "")
-                    if len(row[4])>10:
-                        row[4] = row[4][:9]
-                except (AttributeError, TypeError):
-                    pass
-
-                try:
-                    c.execute(sqlstr, (dealname, updatedate) + tuple(row))
+                    c.executemany(sqlstr, [(dealname, updatedate, k) + tuple(v) for k, v in data.items()])
                 except psycopg2.DataError as detail:
                     print(detail)
                     pdb.set_trace()
-                except psycopg2.IntegrityError as detail:
+                except TypeError as detail:
                     print(detail)
-                    # crazy hack intex unique id is not really unique
-                    conn.rollback()
-                    # make sure the loan name is unique by tagging it
-                    row[0] = row[0] + "_tag_" + str(tag)
-                    tag = tag + 1
-                    c.execute(sqlstr, (dealname, updatedate) + tuple(row))
-                    conn.commit()
-                except TypeError:
                     pdb.set_trace()
-                else:
-                    conn.commit()
-            c.close()
+            conn.commit()
 
 if __name__ == "__main__":
     if len(sys.argv) > 1: