python/collateral/citi.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

import pandas as pd
import subprocess
from bs4 import BeautifulSoup
from pandas.tseries.offsets import BDay
from . import ExchangeMessage, DAILY_DIR, bus_day


def load_file(d):
    try:
        fname = next(
            (DAILY_DIR / "CITI_reports").glob(
                f"262966_Portfolio_{d.strftime('%Y%m%d')}*"
            )
        )
    except StopIteration:
        raise FileNotFoundError(f"CITI file not found for date {d}")
    return pd.read_excel(fname, skiprows=6, skipfooter=2)


def download_files(count=20):
    em = ExchangeMessage()
    emails = em.get_msgs(
        path=["NYops", "Margin Calls Citi"], count=count, subject__startswith="262966"
    )
    DATA_DIR = DAILY_DIR / "CITI_reports"
    for msg in emails:
        for attach in msg.attachments:
            fname = attach.name
            p = DATA_DIR / fname
            if not p.exists():
                p.write_bytes(attach.content)


def load_pdf(file_path):
    proc = subprocess.run(
        ["pdftohtml", "-xml", "-stdout", "-i", file_path.as_posix()],
        capture_output=True,
    )
    soup = BeautifulSoup(proc.stdout, features="lxml")
    l = soup.findAll("text")
    l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"])))
    return l


def get_col(l, top, bottom, left, right):
    return [
        c.text
        for c in l
        if int(c["left"]) >= left
        and int(c["left"]) < right
        and int(c["top"]) >= top
        and int(c["top"]) < bottom
    ]


def parse_num(s):
    s = s.replace(",", "")
    if s[0] == "(":
        return -float(s[1:-1])
    else:
        return float(s)


def get_df(l, col1, col2, col3):
    df = pd.DataFrame(
        {"amount": get_col(l, *col2), "currency": get_col(l, *col3)},
        index=get_col(l, *col1),
    )
    df.amount = df.amount.apply(parse_num)
    df.index = df.index.str.lstrip()
    return df


def get_total_collateral(d):
    try:
        fname = next(
            (DAILY_DIR / "CITI_reports").glob(
                f"262966_MarginNotice_{d.strftime('%Y%m%d')}_*.pdf"
            )
        )
    except StopIteration:
        raise FileNotFoundError(f"CITI file not found for date {d.date()}")
    l = load_pdf(fname)
    col1 = (370, 500, 70, 100)
    col2 = (370, 500, 100, 500)
    col3 = (370, 500, 500, 600)

    variation_margin = get_df(l, col1, col2, col3)
    anchor = next(c for c in l if c.text == "Non Regulatory Initial Margin")
    top = int(anchor["top"]) + 10
    bottom = top + 150
    col1 = (top, bottom, 70, 100)
    col2 = (top, bottom, 100, 505)
    col3 = (top, bottom, 505, 600)
    initial_margin = get_df(l, col1, col2, col3)
    return (
        variation_margin.loc["VM Total Collateral", "amount"]
        + initial_margin.loc["Non Reg IM Total Collateral", "amount"]
    )


def collateral(d, dawn_trades, *args):
    df = load_file(d)
    collat = get_total_collateral(d - BDay())
    df = df[["Operations File", "Market Value", "BasicAmt"]].dropna(
        subset=["Operations File"]
    )  # missing Operations File means assignment usually
    df = df.merge(
        dawn_trades, how="left", left_on="Operations File", right_on="cpty_id"
    )
    missing_ids = df.loc[df.cpty_id.isnull(), "Operations File"]
    if not missing_ids.empty:
        raise ValueError(f"{missing_ids.tolist()} not in the database")
    df = df.groupby("folder").sum()
    df = df.sum(axis=1).to_frame(name="Amount")
    df["Currency"] = "USD"
    df = df.reset_index()
    df.columns = ["Strategy", "Amount", "Currency"]
    df.Amount *= -1
    df = df.append(
        {
            "Strategy": "M_CSH_CASH",
            "Amount": collat - df.Amount.sum(),
            "Currency": "USD",
        },
        ignore_index=True,
    )
    df["date"] = d - bus_day
    return df.set_index("Strategy")