aboutsummaryrefslogtreecommitdiffstats
path: root/python/load_refentity.py
blob: 837b5dfb1877fd156be4a69cd30af201e933375d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from db import dbconn
import lxml.etree as etree
parser = etree.XMLParser(remove_blank_text=True)
from psycopg2.extras import Json

def todict(xml, uselist=set()):
    if len(xml):
        if xml.tag in uselist:
            return [todict(c, uselist) for c in xml]
        else:
            d = {}
            for c in xml:
                if c.tag in d:
                    d[c.tag].append(todict(c, uselist))
                else:
                    if c.tag in ['cdssuccession', 'creditevent', 'auctionccyrate']:
                        d[c.tag] = [todict(c, uselist)]
                    else:
                        d[c.tag] = todict(c, uselist)
            return d
    else:
        return xml.text.strip()

def dispatch_parsing(col, uselist):
    if len(col):
        if col.tag == 'ratings':
            return [el.text for el in col.iterfind(".//tier")]
        else:
            return Json(todict(col, uselist))
    else:
        return col.text

def insert_refentity(fname):
    tree = etree.parse(fname,
                       parser=parser)
    conn = dbconn('serenitasdb')
    names = ['referenceentity', 'shortname', 'ticker', 'redentitycode',
             'entitycusip', 'lei', 'entitytype', 'jurisdiction', 'depthlevel',
             'markitsector', 'isdatradingdefinition', 'recorddate', 'ratings',
             'entityform', 'companynumber', 'alternativenames',
             'isdatransactiontypes', 'validto', 'validfrom', 'events']
    # these are tags which enclose a list
    uselist = set(['events', 'isdatransactiontypes', 'nextredentitycodes', 'prevredentitycodes',
                   'isdatransactiontypes', 'tiers', 'auctions'])
    sql_str = ("""INSERT INTO RefEntity VALUES({})
                 ON CONFLICT(redentitycode) DO UPDATE SET {}""".
               format(",".join(["%s"] * len(names)),
                      ",".join(f"{name}=EXCLUDED.{name}" for name in names[1:])))
    skipfirst = True
    with conn.cursor() as c:
        for child in tree.getroot():
            if skipfirst:
                skipfirst = False
                continue
            d = {col.tag: dispatch_parsing(col, uselist) for col in child}
            c.execute(sql_str, [d.get(name) for name in names])
    conn.commit()

def parse_prospectus(xml):
    return Json({c.tag: [e.text for e in c] for c in xml})

def insert_refobligation(fname):
    tree = etree.parse(fname,
                       parser=parser)
    conn = dbconn('serenitasdb')
    names_redpair = ['redpaircode', 'role', 'referenceentity', 'redentitycode',
                     'tier', 'pairiscurrent', 'pairvalidfrom', 'pairvalidto',
                     'ticker', 'ispreferred', 'preferreddate', 'indexconstituents',
                     'recorddate', 'publiccomments', 'myticker']
    names_refobligation = ['obligationname', 'prospectusinfo', 'refentities',
                           'type', 'isconvert', 'isperp', 'coupontype', 'ccy',
                           'maturity', 'issuedate', 'coupon', 'isin', 'cusip', 'event']

    redpair_insert = ("INSERT INTO RedPairMapping VALUES({})".
                      format(",".join(["%s"] * len(names_redpair))))
    refobligation_insert = ("INSERT INTO RefObligation({}) VALUES({})".
                            format(",".join(names_refobligation),
                                   ",".join(["%s"] * len(names_refobligation))))
    skipfirst = True
    with conn.cursor() as c:
        for child in tree.getroot():
            if skipfirst:
                skipfirst = False
                continue
            d = {col.tag: col if len(col) else col.text for col in child}
            with conn.cursor() as c2:
                for el in d['refentities']:
                    redpair = {e.tag: [ic.text for ic in e] if e.tag == 'indexconstituents'
                               else e.text for e in el}
                    c2.execute(redpair_insert, [redpair.get(name) for name in names_redpair])
            conn.commit()
            d['refentities'] = [el.text for el in d['refentities'].iterfind('.//redentitycode')]
            if 'prospectusinfo' in d:
                d['prospectusinfo'] = parse_prospectus(d['prospectusinfo'])
            c.execute(refobligation_insert, [d.get(name) for name in names_refobligation])
    conn.commit()

if __name__ == "__main__":
    #insert_refobligation('/home/serenitas/guillaume/V9 Red Obligation1503420865581.xml')
    from pathlib import Path
    base_dir = Path('/home/serenitas/CorpCDOs/Tranche_data/RED_reports')
    insert_refentity((base_dir / 'V10 Red Entity Delta1503687306161.xml').as_posix())