aboutsummaryrefslogtreecommitdiffstats
path: root/python/collateral/citi.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/collateral/citi.py')
-rw-r--r--python/collateral/citi.py129
1 files changed, 129 insertions, 0 deletions
diff --git a/python/collateral/citi.py b/python/collateral/citi.py
new file mode 100644
index 00000000..f7d0818d
--- /dev/null
+++ b/python/collateral/citi.py
@@ -0,0 +1,129 @@
+import pandas as pd
+import subprocess
+from bs4 import BeautifulSoup
+from pandas.tseries.offsets import BDay
+from . import ExchangeMessage, DAILY_DIR, bus_day
+
+
+def load_file(d):
+ try:
+ fname = next(
+ (DAILY_DIR / "CITI_reports").glob(
+ f"262966_Portfolio_{d.strftime('%Y%m%d')}*"
+ )
+ )
+ except StopIteration:
+ raise FileNotFoundError(f"CITI file not found for date {d}")
+ return pd.read_excel(fname, skiprows=6, skipfooter=2)
+
+
+def download_files(count=20):
+ em = ExchangeMessage()
+ emails = em.get_msgs(
+ path=["NYops", "Margin Calls Citi"], count=count, subject__startswith="262966"
+ )
+ DATA_DIR = DAILY_DIR / "CITI_reports"
+ for msg in emails:
+ for attach in msg.attachments:
+ fname = attach.name
+ p = DATA_DIR / fname
+ if not p.exists():
+ p.write_bytes(attach.content)
+
+
+def load_pdf(file_path):
+ proc = subprocess.run(
+ ["pdftohtml", "-xml", "-stdout", "-i", file_path.as_posix()],
+ capture_output=True,
+ )
+ soup = BeautifulSoup(proc.stdout, features="lxml")
+ l = soup.findAll("text")
+ l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"])))
+ return l
+
+
+def get_col(l, top, bottom, left, right):
+ return [
+ c.text
+ for c in l
+ if int(c["left"]) >= left
+ and int(c["left"]) < right
+ and int(c["top"]) >= top
+ and int(c["top"]) < bottom
+ ]
+
+
+def parse_num(s):
+ s = s.replace(",", "")
+ if s[0] == "(":
+ return -float(s[1:-1])
+ else:
+ return float(s)
+
+
+def get_df(l, col1, col2, col3):
+ df = pd.DataFrame(
+ {"amount": get_col(l, *col2), "currency": get_col(l, *col3)},
+ index=get_col(l, *col1),
+ )
+ df.amount = df.amount.apply(parse_num)
+ df.index = df.index.str.lstrip()
+ return df
+
+
+def get_total_collateral(d):
+ try:
+ fname = next(
+ (DAILY_DIR / "CITI_reports").glob(
+ f"262966_MarginNotice_{d.strftime('%Y%m%d')}_*.pdf"
+ )
+ )
+ except StopIteration:
+ raise FileNotFoundError(f"CITI file not found for date {d.date()}")
+ l = load_pdf(fname)
+ col1 = (370, 500, 70, 100)
+ col2 = (370, 500, 100, 500)
+ col3 = (370, 500, 500, 600)
+
+ variation_margin = get_df(l, col1, col2, col3)
+ anchor = next(c for c in l if c.text == "Non Regulatory Initial Margin")
+ top = int(anchor["top"]) + 10
+ bottom = top + 150
+ col1 = (top, bottom, 70, 100)
+ col2 = (top, bottom, 100, 505)
+ col3 = (top, bottom, 505, 600)
+ initial_margin = get_df(l, col1, col2, col3)
+ return (
+ variation_margin.loc["VM Total Collateral", "amount"]
+ + initial_margin.loc["Non Reg IM Total Collateral", "amount"]
+ )
+
+
+def collateral(d, dawn_trades, *args):
+ df = load_file(d)
+ collat = get_total_collateral(d - BDay())
+ df = df[["Operations File", "Market Value", "BasicAmt"]].dropna(
+ subset=["Operations File"]
+ ) # missing Operations File means assignment usually
+ df = df.merge(
+ dawn_trades, how="left", left_on="Operations File", right_on="cpty_id"
+ )
+ missing_ids = df.loc[df.cpty_id.isnull(), "Operations File"]
+ if not missing_ids.empty:
+ raise ValueError(f"{missing_ids.tolist()} not in the database")
+ df = df.groupby("folder").sum()
+ df = df.sum(axis=1).to_frame(name="Amount")
+ df["Currency"] = "USD"
+ df = df.reset_index()
+ df.columns = ["Strategy", "Amount", "Currency"]
+ df.Amount *= -1
+ df = df.append(
+ {
+ "Strategy": "M_CSH_CASH",
+ "Amount": collat - df.Amount.sum(),
+ "Currency": "USD",
+ },
+ ignore_index=True,
+ )
+ df["date"] = d - bus_day
+ return df.set_index("Strategy")