diff options
Diffstat (limited to 'python/load_intex_collateral.py')
| -rw-r--r-- | python/load_intex_collateral.py | 179 |
1 files changed, 86 insertions, 93 deletions
diff --git a/python/load_intex_collateral.py b/python/load_intex_collateral.py index de1b6c21..f0b918b2 100644 --- a/python/load_intex_collateral.py +++ b/python/load_intex_collateral.py @@ -4,11 +4,7 @@ import re import csv from datetime import date, datetime import pdb - -if os.name=='nt': - root = "//WDSENTINEL/share/CorpCDOs/" -elif os.name=='posix': - root = "/home/share/CorpCDOs/" +import common fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Maturity Date', 'Asset Subtype', \ 'Asset Type', 'Gross Coupon', 'Spread', 'Frequency', 'Next Paydate', 'Second Lien', \ @@ -26,101 +22,98 @@ def sanitize_float(intex_float): intex_float = float(intex_float) return intex_float -conn = psycopg2.connect(database="ET", user="et_user", password="Serenitas1", host="192.168.1.108") -cursor = conn.cursor() - -workdate = "2013-01-16" +def get_latest_dealupdate(workdate): + with open(os.path.join(common.root, "data", "clo_universe_intex_" + workdate + ".txt")) as fh: + dr = csv.DictReader(fh, dialect='excel-tab') + deal_table = {line['Deal,Tr/CUSIP/ISIN']: datetime.strptime(line['Latest Update'], '%b %d, %Y').date() \ + for line in dr if 'Paid' not in line['Latest Update']} + return deal_table -with open(os.path.join(root, "data", "clo_universe_intex_" + workdate + ".txt")) as fh: - dr = csv.DictReader(fh, dialect='excel-tab') - deal_table = {line['Deal,Tr/CUSIP/ISIN']: datetime.strptime(line['Latest Update'], '%b %d, %Y').date() \ - for line in dr if 'Paid' not in line['Latest Update']} +def upload_data(workdate, conn, cursor): + for dealname in os.listdir(os.path.join(common.root, "data", "Collaterals_" + workdate)): + #dealname, updatedate = line.rstrip().split() + # updatedate = datetime.datetime.strptime(updatedate, '%m/%d/%Y') + #dealname = dealname.upper() + ",AD.txt + with open( os.path.join(common.root, "data", "Collaterals_" + workdate, dealname), "r") as fh: + dr = csv.DictReader(fh, fieldnames = fields, dialect = 'excel-tab') + dealname = dealname.split(',')[0].lower() + data = [] + for line in dr: + if line['Fixed or Float']: + line['Fixed or Float'] = line['Fixed or Float'].upper() + if line['LoanX ID'] and len(line['LoanX ID']) > 8: + print "dubious id found: {0}".format(line['LoanX ID']) + line['LoanX ID'] = line['LoanX ID'][:8] + try: + # make sure the string is utf8 safe + line['Issuer'] = line['Issuer'].decode('windows-1252') + if line['Issuer']=='' or line['Issuer'].lower() == 'dummy': + break + except AttributeError as detail: + print dealname + for field in ['Spread', 'Gross Coupon', 'Market Price', 'Contributed Balance']: + if line[field]: + line[field] = sanitize_float(line[field]) -for dealname in os.listdir(os.path.join(root, "data", "Collaterals_" + workdate)): - #dealname, updatedate = line.rstrip().split() - # updatedate = datetime.datetime.strptime(updatedate, '%m/%d/%Y') - #dealname = dealname.upper() + ",AD.txt" - with open( os.path.join(root, "data", "Collaterals_" + workdate, dealname), "r") as fh: - dr = csv.DictReader(fh, dialect = 'excel-tab') - dealname , ending = dealname.split(',') - dealname = dealname.lower() - data = [] - for line in dr: - missingfields = set(fields) - set(dr.fieldnames) - for field in missingfields: - line[field] = None - if line['Fixed or Float']: - line['Fixed or Float'] = line['Fixed or Float'].upper() - if 'LoanX ID' in dr.fieldnames and len(line['LoanX ID']) >8: - print "dubious id found: {0}".format(line['LoanX ID']) - line['LoanX ID'] = line['LoanX ID'][:8] - try: - # make sure the string is utf8 safe - line['Issuer']= line['Issuer'].decode('windows-1252') - if line['Issuer']=='' or line['Issuer'].lower() == 'dummy': - break - except AttributeError as detail: - print dealname + r = [convertToNone(line[field]) for field in dr.fieldnames] + data.append(r) - for field in ['Spread', 'Gross Coupon', 'Market Price', 'Contributed Balance']: - if line[field]: - line[field] = sanitize_float(line[field]) - try: - r = [convertToNone(line[field]) for field in fields] - except KeyError as detail: - print detail - pdb.set_trace() - data.append(r) + cursor.execute( "select distinct(updatedate) from et_collateral where dealname='{0}'".format(dealname)) + old_update_dates = [date[0] for date in cursor.fetchall()] - cursor.execute( "select distinct(updatedate) from et_collateral where dealname='{0}'".format(dealname)) - old_update_dates = [date[0] for date in cursor.fetchall()] + deal_table = get_latest_dealupdate(workdate) + try: + updatedate = deal_table[dealname] + except KeyError as detail: + print detail + pdb.set_trace() - try: - updatedate = deal_table[dealname] - except KeyError as detail: - print detail - pdb.set_trace() - - reinsert = False - if updatedate in old_update_dates: - sqlstr = "SELECT count(*) FROM et_collateral where dealname = %s and updatedate= %s" - cursor.execute(sqlstr, (dealname, updatedate)) - currlen = cursor.fetchone()[0] - if currlen <> len(data): - print "{0} has {1} rows in the database and current collateral file has {2}".format(dealname, currlen, len(data)) - sqlstr = "DELETE FROM et_collateral where dealname = %s and updatedate = %s" + reinsert = False + if updatedate in old_update_dates: + sqlstr = "SELECT count(*) FROM et_collateral where dealname = %s and updatedate= %s" cursor.execute(sqlstr, (dealname, updatedate)) - conn.commit() - reinsert = True - if reinsert or not old_update_dates or updatedate not in old_update_dates: - tag = 0 - for row in data: - # print dealname, row - sql_fields = ["dealname", "updatedate", "name", "IssuerName", "CurrentBalance", - "Maturity", "AssetSubtype", "AssetType", "GrossCoupon", - "Spread", "Frequency", "NextPaydate", "SecondLien", "LoanXID", - "Cusip", "IntexPrice", "IntexPriceSource", "IntexPriceDate", - "FixedOrFloat", "DefaultedFlag", "CovLite", "isCDO"] + currlen = cursor.fetchone()[0] + if currlen <> len(data): + print "{0} has {1} rows in the database and current collateral file has {2}".format(dealname, currlen, len(data)) + sqlstr = "DELETE FROM et_collateral where dealname = %s and updatedate = %s" + cursor.execute(sqlstr, (dealname, updatedate)) + conn.commit() + reinsert = True + if reinsert or not old_update_dates or updatedate not in old_update_dates: + tag = 0 + for row in data: + # print dealname, row + sql_fields = ["dealname", "updatedate", "name", "IssuerName", "CurrentBalance", + "Maturity", "AssetSubtype", "AssetType", "GrossCoupon", + "Spread", "Frequency", "NextPaydate", "SecondLien", "LoanXID", + "Cusip", "IntexPrice", "IntexPriceSource", "IntexPriceDate", + "FixedOrFloat", "DefaultedFlag", "CovLite", "isCDO"] - sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format(",".join(sql_fields), - ",".join(["%s"] * len(sql_fields))) - try: - cursor.execute(sqlstr, (dealname, updatedate) + tuple(row)) - except psycopg2.DataError as detail: - print detail - pdb.set_trace() - except psycopg2.IntegrityError as detail: - print detail - # crazy hack intex unique id is not really unique - conn.rollback() - # make sure the loan name is unique by tagging it - row[0] = row[0] + "_tag_" + str(tag) - tag = tag+1 - cursor.execute(sqlstr, (dealname, updatedate) + tuple(row)) - conn.commit() + sqlstr = "INSERT INTO ET_COLLATERAL({0}) VALUES({1})".format(",".join(sql_fields), + ",".join(["%s"] * len(sql_fields))) + try: + cursor.execute(sqlstr, (dealname, updatedate) + tuple(row)) + except psycopg2.DataError as detail: + print detail + pdb.set_trace() + except psycopg2.IntegrityError as detail: + print detail + # crazy hack intex unique id is not really unique + conn.rollback() + # make sure the loan name is unique by tagging it + row[0] = row[0] + "_tag_" + str(tag) + tag = tag+1 + cursor.execute(sqlstr, (dealname, updatedate) + tuple(row)) + conn.commit() +if __name__ == "__main__": + if len(sys.argv) > 1: + workdate = sys.argv[1] + else: + workdate = str(datetime.date.today()) + upload_data(workdate, common.conn, common.cursor) + common.cursor.close() + common.conn.close() + print "done" -cursor.close() -conn.close() -print "done" |
