aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/load_intex_collateral.py157
1 files changed, 83 insertions, 74 deletions
diff --git a/python/load_intex_collateral.py b/python/load_intex_collateral.py
index 633dfb74..7da4b13d 100644
--- a/python/load_intex_collateral.py
+++ b/python/load_intex_collateral.py
@@ -1,8 +1,5 @@
import psycopg2
-import os
-import re
-import csv
-import datetime
+import os, csv, datetime
import pdb
from common import root, sanitize_float
from db import conn, query_db
@@ -20,70 +17,83 @@ fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Maturity Date', \
def convertToNone(s):
return None if s=='' else s
-def upload_data(conn, dealnames, workdate):
+def windows1252_encoder(fh):
+ for line in fh:
+ yield line.decode('windows-1252').encode('utf-8')
+
+def upload_data(conn, dealnames, workdate, logger=None):
for dealname in dealnames:
#dealname, updatedate = line.rstrip().split()
# updatedate = datetime.datetime.strptime(updatedate, '%m/%d/%Y')
#dealname = dealname.upper() + ",AD.txt
- with open( os.path.join(root, "data", "Collaterals_" + workdate, dealname.upper() + "_AD.txt"),
- "r", encoding='windows-1252') as fh:
- dr = csv.DictReader(fh, dialect = 'excel-tab')
- missingfields = set(fields).union({'Gross Margin'}) - set(dr.fieldnames)
- if "LoanX ID" in missingfields:
- print("LoanX ID column is missing. Probably an error in exporting from intex")
- pdb.set_trace()
- data = {}
- for line in dr:
- for f in missingfields:
- line[f] = None
- try:
- line['Fixed or Float'] = line['Fixed or Float'].upper()
- line['LoanX ID'] = line['LoanX ID'].upper()
- line['CUSIP'] = line['CUSIP'].upper()
- line['Asset Subtype'] = line['Asset Subtype'].replace("Reinvest ", "")
- line['Asset Subtype'] = line['Asset Subtype'].replace("Reinv ", "")
- except AttributeError:
- pass
- else:
- #sanity checks for loanxid and cusip
- if len(line['LoanX ID']) > 8:
- print("dubious id found: {0}".format(line['LoanX ID']))
- line['LoanX ID'] = line['LoanX ID'][:8]
- if len(line['CUSIP']) > 9:
- print("dubious CUSIP found: {0}".format(line['CUSIP']))
- line['CUSIP'] = line['CUSIP'][:9]
- if len(line['Asset Subtype'])>10:
- line['Asset Subtype'] = line['Asset Subtype'][:9]
+ basedir = os.path.join(root, "data", "Collaterals_" + workdate)
+ if sys.version_info[0]==2:
+ fh = open(os.path.join(basedir, dealname.upper() + "_AD.txt"))
+ fh = windows1252_encoder(fh)
+ else:
+ fh = open(os.path.join(basedir, dealname.upper() + "_AD.txt"), encoding="windows-1252")
- if 'Reinvest Collat' not in missingfields and \
- line['Reinvest Collat'].upper() == 'Y' or line['Issuer'] == '':
- # assume it's a reinvestment asset
- line['Reinvest Collat'] = True
- line['Issuer'] = line['ID Number']
- if not line['Spread']:
- line['Spread'] = line['Gross Margin']
+ dr = csv.DictReader(fh, dialect = 'excel-tab')
+ missingfields = set(fields).union({'Gross Margin'}) - set(dr.fieldnames)
+ if "LoanX ID" in missingfields:
+ msg = "{0}: LoanX ID column is missing. Probably an error in exporting from intex".format(dealname)
+ if logger:
+ logger.info(msg)
+ else:
+ print(msg)
+ data = {}
+ for line in dr:
+ for f in missingfields:
+ line[f] = None
+ try:
+ line['Fixed or Float'] = line['Fixed or Float'].upper()
+ line['LoanX ID'] = line['LoanX ID'].upper()
+ line['CUSIP'] = line['CUSIP'].upper()
+ line['Asset Subtype'] = line['Asset Subtype'].replace("Reinvest ", "")
+ line['Asset Subtype'] = line['Asset Subtype'].replace("Reinv ", "")
+ except AttributeError:
+ pass
+ else:
+ #sanity checks for loanxid and cusip
+ if len(line['LoanX ID']) > 8:
+ print("dubious id found: {0}".format(line['LoanX ID']))
+ line['LoanX ID'] = line['LoanX ID'][:8]
+ if len(line['CUSIP']) > 9:
+ print("dubious CUSIP found: {0}".format(line['CUSIP']))
+ line['CUSIP'] = line['CUSIP'][:9]
+ if len(line['Asset Subtype'])>10:
+ line['Asset Subtype'] = line['Asset Subtype'][:9]
- for field in ['Spread', 'Gross Coupon', 'Market Price', 'Contributed Balance']:
- line[field] = sanitize_float(line[field])
- if line['Market Price'] == 0:
- line['Market Price'] = None
- #we store the Libor Floor in the database, so Life Floor is really Libor Floor
- if line['Life Floor'] == "No limit":
- line['Life Floor'] = 0
- elif line['Life Floor']:
- try:
- line['Life Floor'] = float(line['Life Floor']) - float(line['Spread'])
- except ValueError:
- line['Life Floor'] = float('Nan')
+ if 'Reinvest Collat' not in missingfields and \
+ line['Reinvest Collat'].upper() == 'Y' or line['Issuer'] == '':
+ # assume it's a reinvestment asset
+ line['Reinvest Collat'] = True
+ line['Issuer'] = line['ID Number']
+ if not line['Spread']:
+ line['Spread'] = line['Gross Margin']
+
+ for field in ['Spread', 'Gross Coupon', 'Market Price', 'Contributed Balance']:
+ line[field] = sanitize_float(line[field])
+ if line['Market Price'] == 0:
+ line['Market Price'] = None
+ #we store the Libor Floor in the database, so Life Floor is really Libor Floor
+ if line['Life Floor'] == "No limit":
+ line['Life Floor'] = 0
+ elif line['Life Floor']:
+ try:
+ line['Life Floor'] = float(line['Life Floor']) - float(line['Spread'])
+ except ValueError:
+ line['Life Floor'] = float('Nan')
- # we take care of reinvestment asset lines
- if not line['Asset Name']:
- line['Asset Name'] = 'Reinv'
- r = [convertToNone(line[field]) for field in fields]
- #sometimes the Asset Name is not unique (we add random tag in this case)
- if r[0] in data:
- r[0] = r[0] + str(uuid.uuid4())[:3]
- data[r[0]] = r[1:]
+ # we take care of reinvestment asset lines
+ if not line['Asset Name']:
+ line['Asset Name'] = 'Reinv'
+ r = [convertToNone(line[field]) for field in fields]
+ #sometimes the Asset Name is not unique (we add random tag in this case)
+ if r[0] in data:
+ r[0] = r[0] + str(uuid.uuid4())[:3]
+ data[r[0]] = r[1:]
+ fh.close()
sqlstr = "select distinct(updatedate) from et_collateral where dealname= %s"
old_update_dates = [date[0] for date in query_db(sqlstr, params=(dealname,), one=False)]
@@ -120,18 +130,11 @@ def upload_data(conn, dealnames, workdate):
pdb.set_trace()
conn.commit()
-if __name__ == "__main__":
- if len(sys.argv) > 1:
- workdate = sys.argv[1]
- else:
- workdate = str(datetime.date.today())
- if len(sys.argv) > 2:
- dealnames = sys.argv[2:]
- else:
- dealnames = [re.sub("_AD.txt", "", d).lower() for d in
- os.listdir(os.path.join(root, "data", "Collaterals_" + workdate))]
- files = [os.path.join(root, "data", "Indicative_" + workdate, f) for f in
- os.listdir(os.path.join(root, "data", "Indicative_" + workdate))]
+def intex_data(conn, workdate, logger=None):
+ dealnames = [d.replace("_AD.txt", "").lower() for d in
+ os.listdir(os.path.join(root, "data", "Collaterals_" + workdate))]
+ basedir = os.path.join(root, "data", "Indicative_" + workdate)
+ files = [os.path.join(basedir, f) for f in os.listdir(basedir)]
cusip_files = [f for f in files if "TrInfo" in f]
deal_files = [f for f in files if "TrInfo" not in f]
#first load deal data
@@ -140,7 +143,13 @@ if __name__ == "__main__":
#then load tranche data
for cusip_file in cusip_files:
upload_cusip_data(cusip_file)
- upload_data(conn, dealnames, workdate)
+ upload_data(conn, dealnames, workdate, logger)
+if __name__ == "__main__":
+ if len(sys.argv) > 1:
+ workdate = sys.argv[1]
+ else:
+ workdate = str(datetime.date.today())
+ intex_data(conn, workdate)
conn.close()
print("done")