from utils.db import dbconn from pathlib import Path import datetime import re import lxml.etree as etree parser = etree.XMLParser(remove_blank_text=True) from psycopg2.extras import Json def todict(xml, uselist=set()): if len(xml): if xml.tag in uselist: return [todict(c, uselist) for c in xml] else: d = {} for c in xml: if c.tag in d: d[c.tag].append(todict(c, uselist)) else: if c.tag in ["cdssuccession", "creditevent", "auctionccyrate"]: d[c.tag] = [todict(c, uselist)] else: d[c.tag] = todict(c, uselist) return d else: return xml.text.strip() def dispatch_parsing(col, uselist): if len(col): if col.tag == "ratings": return [el.text for el in col.iterfind(".//tier")] else: return Json(todict(col, uselist)) else: if col.tag == "holdco": return col.text == "TRUE" else: return col.text def insert_refentity(fname): tree = etree.parse(fname, parser=parser) conn = dbconn("serenitasdb") names = [ "referenceentity", "shortname", "ticker", "redentitycode", "entitycusip", "lei", "entitytype", "jurisdiction", "depthlevel", "markitsector", "isdatradingdefinition", "recorddate", "ratings", "entityform", "companynumber", "alternativenames", "isdatransactiontypes", "validto", "validfrom", "events", "holdco", "country", ] # these are tags which enclose a list uselist = set( [ "events", "isdatransactiontypes", "nextredentitycodes", "prevredentitycodes", "isdatransactiontypes", "tiers", "auctions", ] ) sql_str = """INSERT INTO RefEntity VALUES({}) ON CONFLICT(redentitycode) DO UPDATE SET {}""".format( ",".join(["%s"] * len(names)), ",".join(f"{name}=EXCLUDED.{name}" for name in names[1:]), ) skipfirst = True with conn.cursor() as c: for child in tree.getroot(): if skipfirst: skipfirst = False continue d = {col.tag: dispatch_parsing(col, uselist) for col in child} c.execute(sql_str, [d.get(name) for name in names]) conn.commit() def parse_prospectus(xml): return Json({c.tag: [e.text for e in c] for c in xml}) def insert_refobligation(fname): tree = etree.parse(fname, parser=parser) conn = dbconn("serenitasdb") names_redpair = [ "redpaircode", "role", "referenceentity", "redentitycode", "tier", "pairiscurrent", "pairvalidfrom", "pairvalidto", "ticker", "ispreferred", "preferreddate", "indexconstituents", "recorddate", "publiccomments", "myticker", "subordinationtype", "holdco", "preferredremovaldate", ] names_refobligation = [ "obligationname", "prospectusinfo", "refentities", "type", "isconvert", "isperp", "coupontype", "ccy", "maturity", "issuedate", "coupon", "isin", "cusip", "event", ] redpair_insert = """INSERT INTO RedPairMapping VALUES({}) ON CONFLICT(redpaircode) DO UPDATE SET {}""".format( ",".join(["%s"] * len(names_redpair)), ",".join(f"{name}=EXCLUDED.{name}" for name in names_redpair[1:]), ) refobligation_insert = """INSERT INTO RefObligation({}) VALUES({}) ON CONFLICT(obligationname) DO UPDATE SET {}""".format( ",".join(names_refobligation), ",".join(["%s"] * len(names_refobligation)), ",".join(f"{name}=EXCLUDED.{name}" for name in names_refobligation[1:]), ) skipfirst = True def simple_parse(e): if e.tag == "indexconstituents": return [ic.text for ic in e] elif e.tag == "holdco": return e.text == "TRUE" else: return e.text with conn.cursor() as c: for child in tree.getroot(): if skipfirst: skipfirst = False continue d = {col.tag: col if len(col) else col.text for col in child} with conn.cursor() as c2: for el in d["refentities"]: redpair = {e.tag: simple_parse(e) for e in el} c2.execute( redpair_insert, [redpair.get(name) for name in names_redpair] ) conn.commit() d["refentities"] = [ el.text for el in d["refentities"].iterfind(".//redpaircode") ] if "prospectusinfo" in d: d["prospectusinfo"] = parse_prospectus(d["prospectusinfo"]) c.execute( refobligation_insert, [d.get(name) for name in names_refobligation] ) conn.commit() def get_date(f): m = re.search("(\d*)\.", f.name) if m: timestamp = int(m.groups(0)[0]) return datetime.datetime.fromtimestamp(timestamp / 1000) if __name__ == "__main__": from markit_red import download_report base_dir = Path("/home/serenitas/CorpCDOs/Tranche_data/RED_reports/Deltas") for report in ["REDEntityDelta", "REDObligationDelta"]: g = download_report(report) f = base_dir / next(g)[0] if "Entity" in report: insert_refentity(f.as_posix()) else: insert_refobligation(f.as_posix())