from db import dbconn from pathlib import Path import datetime import re import lxml.etree as etree parser = etree.XMLParser(remove_blank_text=True) from psycopg2.extras import Json def todict(xml, uselist=set()): if len(xml): if xml.tag in uselist: return [todict(c, uselist) for c in xml] else: d = {} for c in xml: if c.tag in d: d[c.tag].append(todict(c, uselist)) else: if c.tag in ['cdssuccession', 'creditevent', 'auctionccyrate']: d[c.tag] = [todict(c, uselist)] else: d[c.tag] = todict(c, uselist) return d else: return xml.text.strip() def dispatch_parsing(col, uselist): if len(col): if col.tag == 'ratings': return [el.text for el in col.iterfind(".//tier")] else: return Json(todict(col, uselist)) else: return col.text def insert_refentity(fname): tree = etree.parse(fname, parser=parser) conn = dbconn('serenitasdb') names = ['referenceentity', 'shortname', 'ticker', 'redentitycode', 'entitycusip', 'lei', 'entitytype', 'jurisdiction', 'depthlevel', 'markitsector', 'isdatradingdefinition', 'recorddate', 'ratings', 'entityform', 'companynumber', 'alternativenames', 'isdatransactiontypes', 'validto', 'validfrom', 'events'] # these are tags which enclose a list uselist = set(['events', 'isdatransactiontypes', 'nextredentitycodes', 'prevredentitycodes', 'isdatransactiontypes', 'tiers', 'auctions']) sql_str = ("""INSERT INTO RefEntity VALUES({}) ON CONFLICT(redentitycode) DO UPDATE SET {}""". format(",".join(["%s"] * len(names)), ",".join(f"{name}=EXCLUDED.{name}" for name in names[1:]))) skipfirst = True with conn.cursor() as c: for child in tree.getroot(): if skipfirst: skipfirst = False continue d = {col.tag: dispatch_parsing(col, uselist) for col in child} c.execute(sql_str, [d.get(name) for name in names]) conn.commit() def parse_prospectus(xml): return Json({c.tag: [e.text for e in c] for c in xml}) def insert_refobligation(fname): tree = etree.parse(fname, parser=parser) conn = dbconn('serenitasdb') names_redpair = ['redpaircode', 'role', 'referenceentity', 'redentitycode', 'tier', 'pairiscurrent', 'pairvalidfrom', 'pairvalidto', 'ticker', 'ispreferred', 'preferreddate', 'indexconstituents', 'recorddate', 'publiccomments', 'myticker'] names_refobligation = ['obligationname', 'prospectusinfo', 'refentities', 'type', 'isconvert', 'isperp', 'coupontype', 'ccy', 'maturity', 'issuedate', 'coupon', 'isin', 'cusip', 'event'] redpair_insert = ("""INSERT INTO RedPairMapping VALUES({}) ON CONFLICT(redpaircode) DO UPDATE SET {}""". format(",".join(["%s"] * len(names_redpair)), ",".join(f"{name}=EXCLUDED.{name}" for name in names_redpair[1:]))) refobligation_insert = ("""INSERT INTO RefObligation({}) VALUES({}) ON CONFLICT(obligationname) DO UPDATE SET {}""". format(",".join(names_refobligation), ",".join(["%s"] * len(names_refobligation)), ",".join(f"{name}=EXCLUDED.{name}" for name in names_refobligation[1:]))) skipfirst = True with conn.cursor() as c: for child in tree.getroot(): if skipfirst: skipfirst = False continue d = {col.tag: col if len(col) else col.text for col in child} with conn.cursor() as c2: for el in d['refentities']: redpair = {e.tag: [ic.text for ic in e] if e.tag == 'indexconstituents' else e.text for e in el} c2.execute(redpair_insert, [redpair.get(name) for name in names_redpair]) conn.commit() d['refentities'] = [el.text for el in d['refentities'].iterfind('.//redpaircode')] if 'prospectusinfo' in d: d['prospectusinfo'] = parse_prospectus(d['prospectusinfo']) c.execute(refobligation_insert, [d.get(name) for name in names_refobligation]) conn.commit() def get_date(f): m = re.search("(\d*)\.", f.name) if m: timestamp = int(m.groups(0)[0]) return datetime.datetime.fromtimestamp(timestamp/1000) if __name__ == "__main__": from markit_red import download_report base_dir = Path('/home/serenitas/CorpCDOs/Tranche_data/RED_reports/Deltas') for report in ['REDEntityDelta', 'REDObligationDelta']: f = base_dir / download_report(report)[0] if "Entity" in report: insert_refentity(f.as_posix()) else: insert_refobligation(f.as_posix())