python/collateral/jpm.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

import datetime
import pandas as pd
from io import BytesIO
from pikepdf import Pdf
from . import DAILY_DIR
from .common import load_pdf, get_col, parse_num


def load_file(d, fund):
    try:
        fname = next(
            (DAILY_DIR / fund / "JPM_reports").glob(f"CSCFTCSTMT-*-{d:%y%m%d}*.pdf")
        )
    except StopIteration:
        raise FileNotFoundError(f"JPM file not found for date {d}")
    return pd.read_excel(fname, skiprows=6, skipfooter=2)


paths = {
    # "Serenitas": ["NYops", "Margin Calls JPM"],
    "BowdSt": ["BowdoinOps", "Margin JPM"],
}


def load_file(d, fund):
    try:
        fname = next(
            (DAILY_DIR / fund / "JPM_reports").glob(
                f"CSCFTCSTMT-*-{d:%y%m%d}-909271_2.pdf"
            )
        )
    except StopIteration:
        raise FileNotFoundError(f"JPM file not found for date {d}")
    return fname


def get_collateral(d: datetime.date, fund):
    pdf_file = load_file(d, fund)
    collat_page = load_pdf(pdf_file, pages=True)[3]
    return float(get_col(collat_page, 200, 300, 1000, 1100)[0].replace(",", ""))


def load_positions(d: datetime.date, fund):
    pdf_file = load_file(d, fund)
    positions_page = load_pdf(pdf_file, pages=True)[4]
    anchor = next(c for c in positions_page if c.text.startswith("Total Product Group"))
    bottom = int(anchor["top"])
    widths = (10, 160, 300, 350, 450, 500, 550, 600, 650, 750, 850, 950, 1000, 1200)
    cols = [get_col(positions_page, 200, 289, l, r) for l, r in zip(widths, widths[1:])]

    def combine(l):
        return [f"{l[0]} {l[1]}", *l[2:]]

    cols = [combine(c) if len(c) == 4 else c for c in cols]
    df = pd.DataFrame({c[0]: c[1:] for c in cols})
    for col in ["Pay Notional", "Rec Notional", "MTM Amount", "IM Amount"]:
        df[col] = df[col].apply(parse_num)
    for col in ["Trade Date", "Maturity Date"]:
        df[col] = pd.to_datetime(df[col], format="%d-%b-%y")
    df["Deal ID"] = df["Deal ID"].str.extract(r"[^-]-(.*)")
    return df


def download_files(em, count=20, *, fund="BowdSt", **kwargs):
    if fund not in paths:
        return
    emails = em.get_msgs(path=paths[fund], count=count, subject__startswith="909271")
    DATA_DIR = DAILY_DIR / fund / "JPM_reports"
    for msg in emails:
        for attach in msg.attachments:
            fname = attach.name
            p = DATA_DIR / fname
            if not p.exists():
                stream = BytesIO(attach.content)
                pdf = Pdf.open(stream, password="tm64EO")
                pdf.save(p)