diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/load_refentity.py | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/python/load_refentity.py b/python/load_refentity.py new file mode 100644 index 00000000..5299c7d4 --- /dev/null +++ b/python/load_refentity.py @@ -0,0 +1,96 @@ +from db import dbconn +import lxml.etree as etree +parser = etree.XMLParser(remove_blank_text=True) +from psycopg2.extras import Json + +def todict(xml, uselist=set()): + if len(xml): + if xml.tag in usearray: + return [todict(c, usearray) for c in xml] + else: + d = {} + for c in xml: + if c.tag in d: + d[c.tag].append(todict(c, usearray)) + else: + if c.tag in ['cdssuccession', 'creditevent', 'auctionccyrate']: + d[c.tag] = [todict(c, usearray)] + else: + d[c.tag] = todict(c, usearray) + return d + else: + return xml.text.strip() + +def dispatch_parsing(col, uselist): + if len(col): + if col.tag == 'ratings': + return [el.text for el in col.iterfind(".//tier")] + else: + return Json(todict(col, usearray)) + else: + return col.text + +def upload_refentity(fname): + tree = etree.parse(fname, + parser=parser) + conn = dbconn('serenitasdb') + names = ['referenceentity', 'shortname', 'ticker', 'redentitycode', + 'entitycusip', 'lei', 'entitytype', 'jurisdiction', 'depthlevel', + 'markitsector', 'isdatradingdefinition', 'recorddate', 'ratings', + 'entityform', 'companynumber', 'alternativenames', + 'isdatransactiontypes', 'validto', 'validfrom', 'events'] + # these are tags which enclose a list + uselist = set(['events', 'isdatransactiontypes', 'nextredentitycodes', 'prevredentitycodes', + 'isdatransactiontypes', 'tiers', 'auctions']) + sql_str = "INSERT INTO RefEntity VALUES({})".format(",".join(["%s"] * len(names))) + skipfirst = True + with conn.cursor() as c: + for child in tree.getroot(): + if skipfirst: + skipfirst = False + continue + d = {col.tag: dispatch_parsing(col) for col in child} + c.execute(sql_str, [d.get(name) for name in names]) + conn.commit() + +def parse_prospectus(xml): + return Json({c.tag: [e.text for e in c] for c in xml}) + +def upload_refobligation(fname): + tree = etree.parse(fname, + parser=parser) + conn = dbconn('serenitasdb') + names_redpair = ['redpaircode', 'role', 'referenceentity', 'redentitycode', + 'tier', 'pairiscurrent', 'pairvalidfrom', 'pairvalidto', + 'ticker', 'ispreferred', 'preferreddate', 'indexconstituents', + 'recorddate', 'publiccomments', 'myticker'] + names_refobligation = ['obligationname', 'prospectusinfo', 'refentities', + 'type', 'isconvert', 'isperp', 'coupontype', 'ccy', + 'maturity', 'issuedate', 'coupon', 'isin', 'cusip', 'event'] + + redpair_insert = ("INSERT INTO RedPairMapping VALUES({})". + format(",".join(["%s"] * len(names_redpair)))) + refobligation_insert = ("INSERT INTO RefObligation({}) VALUES({})". + format(",".join(names_refobligation), + ",".join(["%s"] * len(names_refobligation)))) + skipfirst = True + with conn.cursor() as c: + for child in tree.getroot(): + if skipfirst: + skipfirst = False + continue + d = {col.tag: col if len(col) else col.text for col in child} + with conn.cursor() as c2: + for el in d['refentities']: + redpair = {e.tag: [ic.text for ic in e] if e.tag == 'indexconstituents' + else e.text for e in el} + c2.execute(redpair_insert, [redpair.get(name) for name in names_redpair]) + conn.commit() + d['refentities'] = [el.text for el in d['refentities'].iterfind('.//redentitycode')] + if 'prospectusinfo' in d: + d['prospectusinfo'] = parse_prospectus(d['prospectusinfo']) + c.execute(refobligation_insert, [d.get(name) for name in names_refobligation]) + conn.commit() + +if __name__ == "__main__": + upload_refobligation('/home/serenitas/guillaume/V9 Red Obligation1503420865581.xml') |
