aboutsummaryrefslogtreecommitdiffstats
path: root/python/load_refentity.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/load_refentity.py')
-rw-r--r--python/load_refentity.py168
1 files changed, 120 insertions, 48 deletions
diff --git a/python/load_refentity.py b/python/load_refentity.py
index 7e6a9f6b..2d07bf62 100644
--- a/python/load_refentity.py
+++ b/python/load_refentity.py
@@ -3,9 +3,11 @@ from pathlib import Path
import datetime
import re
import lxml.etree as etree
+
parser = etree.XMLParser(remove_blank_text=True)
from psycopg2.extras import Json
+
def todict(xml, uselist=set()):
if len(xml):
if xml.tag in uselist:
@@ -16,7 +18,7 @@ def todict(xml, uselist=set()):
if c.tag in d:
d[c.tag].append(todict(c, uselist))
else:
- if c.tag in ['cdssuccession', 'creditevent', 'auctionccyrate']:
+ if c.tag in ["cdssuccession", "creditevent", "auctionccyrate"]:
d[c.tag] = [todict(c, uselist)]
else:
d[c.tag] = todict(c, uselist)
@@ -24,9 +26,10 @@ def todict(xml, uselist=set()):
else:
return xml.text.strip()
+
def dispatch_parsing(col, uselist):
if len(col):
- if col.tag == 'ratings':
+ if col.tag == "ratings":
return [el.text for el in col.iterfind(".//tier")]
else:
return Json(todict(col, uselist))
@@ -36,23 +39,51 @@ def dispatch_parsing(col, uselist):
else:
return col.text
+
def insert_refentity(fname):
- tree = etree.parse(fname,
- parser=parser)
- conn = dbconn('serenitasdb')
- names = ['referenceentity', 'shortname', 'ticker', 'redentitycode',
- 'entitycusip', 'lei', 'entitytype', 'jurisdiction', 'depthlevel',
- 'markitsector', 'isdatradingdefinition', 'recorddate', 'ratings',
- 'entityform', 'companynumber', 'alternativenames',
- 'isdatransactiontypes', 'validto', 'validfrom', 'events',
- 'holdco', 'country',]
+ tree = etree.parse(fname, parser=parser)
+ conn = dbconn("serenitasdb")
+ names = [
+ "referenceentity",
+ "shortname",
+ "ticker",
+ "redentitycode",
+ "entitycusip",
+ "lei",
+ "entitytype",
+ "jurisdiction",
+ "depthlevel",
+ "markitsector",
+ "isdatradingdefinition",
+ "recorddate",
+ "ratings",
+ "entityform",
+ "companynumber",
+ "alternativenames",
+ "isdatransactiontypes",
+ "validto",
+ "validfrom",
+ "events",
+ "holdco",
+ "country",
+ ]
# these are tags which enclose a list
- uselist = set(['events', 'isdatransactiontypes', 'nextredentitycodes', 'prevredentitycodes',
- 'isdatransactiontypes', 'tiers', 'auctions'])
- sql_str = ("""INSERT INTO RefEntity VALUES({})
- ON CONFLICT(redentitycode) DO UPDATE SET {}""".
- format(",".join(["%s"] * len(names)),
- ",".join(f"{name}=EXCLUDED.{name}" for name in names[1:])))
+ uselist = set(
+ [
+ "events",
+ "isdatransactiontypes",
+ "nextredentitycodes",
+ "prevredentitycodes",
+ "isdatransactiontypes",
+ "tiers",
+ "auctions",
+ ]
+ )
+ sql_str = """INSERT INTO RefEntity VALUES({})
+ ON CONFLICT(redentitycode) DO UPDATE SET {}""".format(
+ ",".join(["%s"] * len(names)),
+ ",".join(f"{name}=EXCLUDED.{name}" for name in names[1:]),
+ )
skipfirst = True
with conn.cursor() as c:
for child in tree.getroot():
@@ -63,36 +94,68 @@ def insert_refentity(fname):
c.execute(sql_str, [d.get(name) for name in names])
conn.commit()
+
def parse_prospectus(xml):
return Json({c.tag: [e.text for e in c] for c in xml})
+
def insert_refobligation(fname):
- tree = etree.parse(fname,
- parser=parser)
- conn = dbconn('serenitasdb')
- names_redpair = ['redpaircode', 'role', 'referenceentity', 'redentitycode',
- 'tier', 'pairiscurrent', 'pairvalidfrom', 'pairvalidto',
- 'ticker', 'ispreferred', 'preferreddate', 'indexconstituents',
- 'recorddate', 'publiccomments', 'myticker',
- 'subordinationtype', 'holdco', 'preferredremovaldate']
- names_refobligation = ['obligationname', 'prospectusinfo', 'refentities',
- 'type', 'isconvert', 'isperp', 'coupontype', 'ccy',
- 'maturity', 'issuedate', 'coupon', 'isin', 'cusip', 'event']
+ tree = etree.parse(fname, parser=parser)
+ conn = dbconn("serenitasdb")
+ names_redpair = [
+ "redpaircode",
+ "role",
+ "referenceentity",
+ "redentitycode",
+ "tier",
+ "pairiscurrent",
+ "pairvalidfrom",
+ "pairvalidto",
+ "ticker",
+ "ispreferred",
+ "preferreddate",
+ "indexconstituents",
+ "recorddate",
+ "publiccomments",
+ "myticker",
+ "subordinationtype",
+ "holdco",
+ "preferredremovaldate",
+ ]
+ names_refobligation = [
+ "obligationname",
+ "prospectusinfo",
+ "refentities",
+ "type",
+ "isconvert",
+ "isperp",
+ "coupontype",
+ "ccy",
+ "maturity",
+ "issuedate",
+ "coupon",
+ "isin",
+ "cusip",
+ "event",
+ ]
- redpair_insert = ("""INSERT INTO RedPairMapping VALUES({})
- ON CONFLICT(redpaircode) DO UPDATE SET {}""".
- format(",".join(["%s"] * len(names_redpair)),
- ",".join(f"{name}=EXCLUDED.{name}" for name in names_redpair[1:])))
- refobligation_insert = ("""INSERT INTO RefObligation({}) VALUES({})
- ON CONFLICT(obligationname) DO UPDATE SET {}""".
- format(",".join(names_refobligation),
- ",".join(["%s"] * len(names_refobligation)),
- ",".join(f"{name}=EXCLUDED.{name}" for name in names_refobligation[1:])))
+ redpair_insert = """INSERT INTO RedPairMapping VALUES({})
+ ON CONFLICT(redpaircode) DO UPDATE SET {}""".format(
+ ",".join(["%s"] * len(names_redpair)),
+ ",".join(f"{name}=EXCLUDED.{name}" for name in names_redpair[1:]),
+ )
+ refobligation_insert = """INSERT INTO RefObligation({}) VALUES({})
+ ON CONFLICT(obligationname) DO UPDATE SET {}""".format(
+ ",".join(names_refobligation),
+ ",".join(["%s"] * len(names_refobligation)),
+ ",".join(f"{name}=EXCLUDED.{name}" for name in names_refobligation[1:]),
+ )
skipfirst = True
+
def simple_parse(e):
- if e.tag == 'indexconstituents':
+ if e.tag == "indexconstituents":
return [ic.text for ic in e]
- elif e.tag == 'holdco':
+ elif e.tag == "holdco":
return e.text == "TRUE"
else:
return e.text
@@ -104,26 +167,35 @@ def insert_refobligation(fname):
continue
d = {col.tag: col if len(col) else col.text for col in child}
with conn.cursor() as c2:
- for el in d['refentities']:
+ for el in d["refentities"]:
redpair = {e.tag: simple_parse(e) for e in el}
- c2.execute(redpair_insert, [redpair.get(name) for name in names_redpair])
+ c2.execute(
+ redpair_insert, [redpair.get(name) for name in names_redpair]
+ )
conn.commit()
- d['refentities'] = [el.text for el in d['refentities'].iterfind('.//redpaircode')]
- if 'prospectusinfo' in d:
- d['prospectusinfo'] = parse_prospectus(d['prospectusinfo'])
- c.execute(refobligation_insert, [d.get(name) for name in names_refobligation])
+ d["refentities"] = [
+ el.text for el in d["refentities"].iterfind(".//redpaircode")
+ ]
+ if "prospectusinfo" in d:
+ d["prospectusinfo"] = parse_prospectus(d["prospectusinfo"])
+ c.execute(
+ refobligation_insert, [d.get(name) for name in names_refobligation]
+ )
conn.commit()
+
def get_date(f):
m = re.search("(\d*)\.", f.name)
if m:
timestamp = int(m.groups(0)[0])
- return datetime.datetime.fromtimestamp(timestamp/1000)
+ return datetime.datetime.fromtimestamp(timestamp / 1000)
+
if __name__ == "__main__":
from markit_red import download_report
- base_dir = Path('/home/serenitas/CorpCDOs/Tranche_data/RED_reports/Deltas')
- for report in ['REDEntityDelta', 'REDObligationDelta']:
+
+ base_dir = Path("/home/serenitas/CorpCDOs/Tranche_data/RED_reports/Deltas")
+ for report in ["REDEntityDelta", "REDObligationDelta"]:
g = download_report(report)
f = base_dir / next(g)[0]
if "Entity" in report: