diff options
Diffstat (limited to 'python/load_refentity.py')
| -rw-r--r-- | python/load_refentity.py | 168 |
1 files changed, 120 insertions, 48 deletions
diff --git a/python/load_refentity.py b/python/load_refentity.py index 7e6a9f6b..2d07bf62 100644 --- a/python/load_refentity.py +++ b/python/load_refentity.py @@ -3,9 +3,11 @@ from pathlib import Path import datetime import re import lxml.etree as etree + parser = etree.XMLParser(remove_blank_text=True) from psycopg2.extras import Json + def todict(xml, uselist=set()): if len(xml): if xml.tag in uselist: @@ -16,7 +18,7 @@ def todict(xml, uselist=set()): if c.tag in d: d[c.tag].append(todict(c, uselist)) else: - if c.tag in ['cdssuccession', 'creditevent', 'auctionccyrate']: + if c.tag in ["cdssuccession", "creditevent", "auctionccyrate"]: d[c.tag] = [todict(c, uselist)] else: d[c.tag] = todict(c, uselist) @@ -24,9 +26,10 @@ def todict(xml, uselist=set()): else: return xml.text.strip() + def dispatch_parsing(col, uselist): if len(col): - if col.tag == 'ratings': + if col.tag == "ratings": return [el.text for el in col.iterfind(".//tier")] else: return Json(todict(col, uselist)) @@ -36,23 +39,51 @@ def dispatch_parsing(col, uselist): else: return col.text + def insert_refentity(fname): - tree = etree.parse(fname, - parser=parser) - conn = dbconn('serenitasdb') - names = ['referenceentity', 'shortname', 'ticker', 'redentitycode', - 'entitycusip', 'lei', 'entitytype', 'jurisdiction', 'depthlevel', - 'markitsector', 'isdatradingdefinition', 'recorddate', 'ratings', - 'entityform', 'companynumber', 'alternativenames', - 'isdatransactiontypes', 'validto', 'validfrom', 'events', - 'holdco', 'country',] + tree = etree.parse(fname, parser=parser) + conn = dbconn("serenitasdb") + names = [ + "referenceentity", + "shortname", + "ticker", + "redentitycode", + "entitycusip", + "lei", + "entitytype", + "jurisdiction", + "depthlevel", + "markitsector", + "isdatradingdefinition", + "recorddate", + "ratings", + "entityform", + "companynumber", + "alternativenames", + "isdatransactiontypes", + "validto", + "validfrom", + "events", + "holdco", + "country", + ] # these are tags which enclose a list - uselist = set(['events', 'isdatransactiontypes', 'nextredentitycodes', 'prevredentitycodes', - 'isdatransactiontypes', 'tiers', 'auctions']) - sql_str = ("""INSERT INTO RefEntity VALUES({}) - ON CONFLICT(redentitycode) DO UPDATE SET {}""". - format(",".join(["%s"] * len(names)), - ",".join(f"{name}=EXCLUDED.{name}" for name in names[1:]))) + uselist = set( + [ + "events", + "isdatransactiontypes", + "nextredentitycodes", + "prevredentitycodes", + "isdatransactiontypes", + "tiers", + "auctions", + ] + ) + sql_str = """INSERT INTO RefEntity VALUES({}) + ON CONFLICT(redentitycode) DO UPDATE SET {}""".format( + ",".join(["%s"] * len(names)), + ",".join(f"{name}=EXCLUDED.{name}" for name in names[1:]), + ) skipfirst = True with conn.cursor() as c: for child in tree.getroot(): @@ -63,36 +94,68 @@ def insert_refentity(fname): c.execute(sql_str, [d.get(name) for name in names]) conn.commit() + def parse_prospectus(xml): return Json({c.tag: [e.text for e in c] for c in xml}) + def insert_refobligation(fname): - tree = etree.parse(fname, - parser=parser) - conn = dbconn('serenitasdb') - names_redpair = ['redpaircode', 'role', 'referenceentity', 'redentitycode', - 'tier', 'pairiscurrent', 'pairvalidfrom', 'pairvalidto', - 'ticker', 'ispreferred', 'preferreddate', 'indexconstituents', - 'recorddate', 'publiccomments', 'myticker', - 'subordinationtype', 'holdco', 'preferredremovaldate'] - names_refobligation = ['obligationname', 'prospectusinfo', 'refentities', - 'type', 'isconvert', 'isperp', 'coupontype', 'ccy', - 'maturity', 'issuedate', 'coupon', 'isin', 'cusip', 'event'] + tree = etree.parse(fname, parser=parser) + conn = dbconn("serenitasdb") + names_redpair = [ + "redpaircode", + "role", + "referenceentity", + "redentitycode", + "tier", + "pairiscurrent", + "pairvalidfrom", + "pairvalidto", + "ticker", + "ispreferred", + "preferreddate", + "indexconstituents", + "recorddate", + "publiccomments", + "myticker", + "subordinationtype", + "holdco", + "preferredremovaldate", + ] + names_refobligation = [ + "obligationname", + "prospectusinfo", + "refentities", + "type", + "isconvert", + "isperp", + "coupontype", + "ccy", + "maturity", + "issuedate", + "coupon", + "isin", + "cusip", + "event", + ] - redpair_insert = ("""INSERT INTO RedPairMapping VALUES({}) - ON CONFLICT(redpaircode) DO UPDATE SET {}""". - format(",".join(["%s"] * len(names_redpair)), - ",".join(f"{name}=EXCLUDED.{name}" for name in names_redpair[1:]))) - refobligation_insert = ("""INSERT INTO RefObligation({}) VALUES({}) - ON CONFLICT(obligationname) DO UPDATE SET {}""". - format(",".join(names_refobligation), - ",".join(["%s"] * len(names_refobligation)), - ",".join(f"{name}=EXCLUDED.{name}" for name in names_refobligation[1:]))) + redpair_insert = """INSERT INTO RedPairMapping VALUES({}) + ON CONFLICT(redpaircode) DO UPDATE SET {}""".format( + ",".join(["%s"] * len(names_redpair)), + ",".join(f"{name}=EXCLUDED.{name}" for name in names_redpair[1:]), + ) + refobligation_insert = """INSERT INTO RefObligation({}) VALUES({}) + ON CONFLICT(obligationname) DO UPDATE SET {}""".format( + ",".join(names_refobligation), + ",".join(["%s"] * len(names_refobligation)), + ",".join(f"{name}=EXCLUDED.{name}" for name in names_refobligation[1:]), + ) skipfirst = True + def simple_parse(e): - if e.tag == 'indexconstituents': + if e.tag == "indexconstituents": return [ic.text for ic in e] - elif e.tag == 'holdco': + elif e.tag == "holdco": return e.text == "TRUE" else: return e.text @@ -104,26 +167,35 @@ def insert_refobligation(fname): continue d = {col.tag: col if len(col) else col.text for col in child} with conn.cursor() as c2: - for el in d['refentities']: + for el in d["refentities"]: redpair = {e.tag: simple_parse(e) for e in el} - c2.execute(redpair_insert, [redpair.get(name) for name in names_redpair]) + c2.execute( + redpair_insert, [redpair.get(name) for name in names_redpair] + ) conn.commit() - d['refentities'] = [el.text for el in d['refentities'].iterfind('.//redpaircode')] - if 'prospectusinfo' in d: - d['prospectusinfo'] = parse_prospectus(d['prospectusinfo']) - c.execute(refobligation_insert, [d.get(name) for name in names_refobligation]) + d["refentities"] = [ + el.text for el in d["refentities"].iterfind(".//redpaircode") + ] + if "prospectusinfo" in d: + d["prospectusinfo"] = parse_prospectus(d["prospectusinfo"]) + c.execute( + refobligation_insert, [d.get(name) for name in names_refobligation] + ) conn.commit() + def get_date(f): m = re.search("(\d*)\.", f.name) if m: timestamp = int(m.groups(0)[0]) - return datetime.datetime.fromtimestamp(timestamp/1000) + return datetime.datetime.fromtimestamp(timestamp / 1000) + if __name__ == "__main__": from markit_red import download_report - base_dir = Path('/home/serenitas/CorpCDOs/Tranche_data/RED_reports/Deltas') - for report in ['REDEntityDelta', 'REDObligationDelta']: + + base_dir = Path("/home/serenitas/CorpCDOs/Tranche_data/RED_reports/Deltas") + for report in ["REDEntityDelta", "REDObligationDelta"]: g = download_report(report) f = base_dir / next(g)[0] if "Entity" in report: |
