1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
import pandas as pd
import subprocess
from bs4 import BeautifulSoup
from pandas.tseries.offsets import BDay
from . import ExchangeMessage, DAILY_DIR, bus_day
def load_file(d):
try:
fname = next(
(DAILY_DIR / "CITI_reports").glob(
f"262966_Portfolio_{d.strftime('%Y%m%d')}*"
)
)
except StopIteration:
raise FileNotFoundError(f"CITI file not found for date {d}")
return pd.read_excel(fname, skiprows=6, skipfooter=2)
def download_files(count=20):
em = ExchangeMessage()
emails = em.get_msgs(
path=["NYops", "Margin Calls Citi"], count=count, subject__startswith="262966"
)
DATA_DIR = DAILY_DIR / "CITI_reports"
for msg in emails:
for attach in msg.attachments:
fname = attach.name
p = DATA_DIR / fname
if not p.exists():
p.write_bytes(attach.content)
def load_pdf(file_path):
proc = subprocess.run(
["pdftohtml", "-xml", "-stdout", "-i", file_path.as_posix()],
capture_output=True,
)
soup = BeautifulSoup(proc.stdout, features="lxml")
l = soup.findAll("text")
l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"])))
return l
def get_col(l, top, bottom, left, right):
return [
c.text
for c in l
if int(c["left"]) >= left
and int(c["left"]) < right
and int(c["top"]) >= top
and int(c["top"]) < bottom
]
def parse_num(s):
s = s.replace(",", "")
if s[0] == "(":
return -float(s[1:-1])
else:
return float(s)
def get_df(l, col1, col2, col3):
df = pd.DataFrame(
{"amount": get_col(l, *col2), "currency": get_col(l, *col3)},
index=get_col(l, *col1),
)
df.amount = df.amount.apply(parse_num)
df.index = df.index.str.lstrip()
return df
def get_total_collateral(d):
try:
fname = next(
(DAILY_DIR / "CITI_reports").glob(
f"262966_MarginNotice_{d.strftime('%Y%m%d')}_*.pdf"
)
)
except StopIteration:
raise FileNotFoundError(f"CITI file not found for date {d.date()}")
l = load_pdf(fname)
col1 = (370, 500, 70, 100)
col2 = (370, 500, 100, 500)
col3 = (370, 500, 500, 600)
variation_margin = get_df(l, col1, col2, col3)
anchor = next(c for c in l if c.text == "Non Regulatory Initial Margin")
top = int(anchor["top"]) + 10
bottom = top + 150
col1 = (top, bottom, 70, 100)
col2 = (top, bottom, 100, 505)
col3 = (top, bottom, 505, 600)
initial_margin = get_df(l, col1, col2, col3)
return (
variation_margin.loc["VM Total Collateral", "amount"]
+ initial_margin.loc["Non Reg IM Total Collateral", "amount"]
)
def collateral(d, dawn_trades, *args):
df = load_file(d)
collat = get_total_collateral(d - BDay())
df = df[["Operations File", "Market Value", "BasicAmt"]].dropna(
subset=["Operations File"]
) # missing Operations File means assignment usually
df = df.merge(
dawn_trades, how="left", left_on="Operations File", right_on="cpty_id"
)
missing_ids = df.loc[df.cpty_id.isnull(), "Operations File"]
if not missing_ids.empty:
raise ValueError(f"{missing_ids.tolist()} not in the database")
df = df.groupby("folder").sum()
df = df.sum(axis=1).to_frame(name="Amount")
df["Currency"] = "USD"
df = df.reset_index()
df.columns = ["Strategy", "Amount", "Currency"]
df.Amount *= -1
df = df.append(
{
"Strategy": "M_CSH_CASH",
"Amount": collat - df.Amount.sum(),
"Currency": "USD",
},
ignore_index=True,
)
df["date"] = d - bus_day
return df.set_index("Strategy")
|