python/load_intex_collateral.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

import psycopg2
import os.path
import re
import csv
from datetime import date, datetime
import pdb

if os.name=='nt':
    root = "//WDSENTINEL/share/CorpCDOs/"
elif os.name=='posix':
    root = "/home/share/CorpCDOs/"

fields = ['Asset Name', 'Issuer', 'Contributed Balance', 'Maturity Date', 'Asset Subtype', \
          'Asset Type', 'Gross Coupon', 'Spread', 'Frequency', 'Next Paydate', 'Second Lien', \
          'LoanX ID', 'CUSIP', 'Market Price', 'Market Price Source', 'Price Date', 'Fixed or Float', \
          'Defaulted Flag', 'Security Sub-Category', 'Structured Finance Security']

def convertToNone(s):
    return None if s=='' else s

conn = psycopg2.connect(database="ET", user="et_user", password="Serenitas1", host="192.168.1.108")
cursor = conn.cursor()
prog = re.compile("\((.*)\)")

workdate = "2012-11-30"
with open(os.path.join(root, "data", "clo_universe_intex_" + workdate + ".txt")) as fh:
    dr = csv.DictReader(fh, dialect='excel-tab')
    deal_table = {line['Deal,Tr/CUSIP/ISIN']: datetime.strptime(line['Latest Update'], '%b %d, %Y').date() \
                  for line in dr if 'Paid' not in line['Latest Update']}

for dealname in os.listdir(os.path.join(root, "Collaterals_" + workdate)):
    #dealname, updatedate = line.rstrip().split()
    # updatedate = datetime.datetime.strptime(updatedate, '%m/%d/%Y')
    #dealname = dealname.upper() + ",AD.txt"
    with open( os.path.join(root, "Collaterals_" + workdate, dealname), "r") as fh:
        dr = csv.DictReader(fh, dialect = 'excel-tab')
        dealname , ending = dealname.split(',')
        dealname = dealname.lower()
        data = []
        for line in dr:
            missingfields = list(set(fields) - set(dr.fieldnames))
            for field in missingfields:
                line[field] = None
            if line['Fixed or Float'] is not None:
                line['Fixed or Float'] = line['Fixed or Float'].upper()
            if 'LoanX ID' in dr.fieldnames and len(line['LoanX ID']) >8:
                print "dubious id found: {0}".format(line['LoanX ID'])
                line['LoanX ID'] = line['LoanX ID'][:8]
            try:
                # make sure the string is utf8 safe
                line['Issuer']= line['Issuer'].decode('windows-1252')
                if line['Issuer']=='' or line['Issuer'].lower() == 'dummy':
                    break
            except AttributeError as detail:
                print dealname
            #convert (x) to -x
            for field in ['Spread', 'Gross Coupon']:
                if line[field] and prog.match(line[field]):
                    line[field] = - float(prog.match(line[field]).group(1))
            if line['Market Price'] != '' and isinstance(line['Market Price'], str):
                line['Market Price'] = float(line['Market Price'].replace(',',''))
            if isinstance(line['Contributed Balance'], str):
                line['Contributed Balance'] = float(line['Contributed Balance'].replace(',',''))
            try:
                r = [convertToNone(line[field]) for field in fields]
            except KeyError as detail:
                print detail
                pdb.set_trace()
            data.append(r)

    cursor.execute( "select distinct(updatedate) from et_collateral where dealname='{0}'".format(dealname))
    old_update_dates = [date[0] for date in cursor.fetchall()]

    try:
        updatedate = deal_table[dealname]
    except KeyError as detail:
        print detail
        pdb.set_trace()
    if updatedate in old_update_dates:
        sqlstr = "SELECT count(*) from et_collateral where dealname = %s and updatedate= %s"
        cursor.execute(sqlstr, (dealname, updatedate))
        currlen = cursor.fetchone()[0]
        if currlen <> len(data):
            print dealname
    if not old_update_dates or updatedate not in old_update_dates:
        tag = 0
        for row in data:
            # print dealname, row
            sqlstr = "INSERT INTO ET_COLLATERAL(dealname, updatedate, name, IssuerName," \
                "CurrentBalance, Maturity, AssetSubtype, AssetType, GrossCoupon," \
                "Spread, Frequency, NextPaydate, SecondLien, LoanXID, Cusip, IntexPrice," \
                "IntexPriceSource, IntexPriceDate, FixedOrFloat, DefaultedFlag, CovLite, isCDO)" \
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s," \
                "%s, %s, %s)"
            try:
                cursor.execute(sqlstr, (dealname, updatedate) + tuple(row))
            except psycopg2.DataError as detail:
                print detail
                pdb.set_trace()
            except psycopg2.IntegrityError as detail:
                print detail
                # crazy hack intex unique id is not really unique
                conn.rollback()
                # make sure the loan name is unique by tagging it
                row[0] = row[0] + "_tag_" + str(tag)
                tag = tag+1
                    cursor.execute(sqlstr, (dealname, updatedate) + tuple(row))
            conn.commit()


cursor.close()
conn.close()