1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
import datetime
import pandas as pd
from io import BytesIO
from pikepdf import Pdf
from . import DAILY_DIR
from .common import load_pdf, get_col, parse_num
def load_file(d, fund):
try:
fname = next(
(DAILY_DIR / fund / "JPM_reports").glob(f"CSCFTCSTMT-*-{d:%y%m%d}*.pdf")
)
except StopIteration:
raise FileNotFoundError(f"JPM file not found for date {d}")
return pd.read_excel(fname, skiprows=6, skipfooter=2)
paths = {
# "Serenitas": ["NYops", "Margin Calls JPM"],
"BowdSt": ["BowdoinOps", "Margin JPM"],
}
def load_file(d, fund):
try:
fname = next(
(DAILY_DIR / fund / "JPM_reports").glob(
f"CSCFTCSTMT-*-{d:%y%m%d}-909271_2.pdf"
)
)
except StopIteration:
raise FileNotFoundError(f"JPM file not found for date {d}")
return fname
def get_collateral(d: datetime.date, fund):
pdf_file = load_file(d, fund)
collat_page = load_pdf(pdf_file, pages=True)[3]
return float(get_col(collat_page, 200, 300, 1000, 1100)[0].replace(",", ""))
def load_positions(d: datetime.date, fund):
pdf_file = load_file(d, fund)
positions_page = load_pdf(pdf_file, pages=True)[4]
anchor = next(c for c in positions_page if c.text.startswith("Total Product Group"))
bottom = int(anchor["top"])
widths = (10, 160, 300, 350, 450, 500, 550, 600, 650, 750, 850, 950, 1000, 1200)
cols = [get_col(positions_page, 200, 289, l, r) for l, r in zip(widths, widths[1:])]
def combine(l):
return [f"{l[0]} {l[1]}", *l[2:]]
cols = [combine(c) if len(c) == 4 else c for c in cols]
df = pd.DataFrame({c[0]: c[1:] for c in cols})
for col in ["Pay Notional", "Rec Notional", "MTM Amount", "IM Amount"]:
df[col] = df[col].apply(parse_num)
for col in ["Trade Date", "Maturity Date"]:
df[col] = pd.to_datetime(df[col], format="%d-%b-%y")
df["Deal ID"] = df["Deal ID"].str.extract(r"[^-]-(.*)")
return df
def download_files(em, count=20, *, fund="BowdSt", **kwargs):
if fund not in paths:
return
emails = em.get_msgs(path=paths[fund], count=count, subject__startswith="909271")
DATA_DIR = DAILY_DIR / fund / "JPM_reports"
for msg in emails:
for attach in msg.attachments:
fname = attach.name
p = DATA_DIR / fname
if not p.exists():
stream = BytesIO(attach.content)
pdf = Pdf.open(stream, password="tm64EO")
pdf.save(p)
|