1 files changed, 129 insertions, 0 deletions
diff --git a/python/collateral/citi.py b/python/collateral/citi.py
new file mode 100644
index 00000000..f7d0818d
--- /dev/null
+++ b/python/collateral/citi.py
@@ -0,0 +1,129 @@
+import pandas as pd
+import subprocess
+from bs4 import BeautifulSoup
+from pandas.tseries.offsets import BDay
+from . import ExchangeMessage, DAILY_DIR, bus_day
+
+
+def load_file(d):
+    try:
+        fname = next(
+            (DAILY_DIR / "CITI_reports").glob(
+                f"262966_Portfolio_{d.strftime('%Y%m%d')}*"
+            )
+        )
+    except StopIteration:
+        raise FileNotFoundError(f"CITI file not found for date {d}")
+    return pd.read_excel(fname, skiprows=6, skipfooter=2)
+
+
+def download_files(count=20):
+    em = ExchangeMessage()
+    emails = em.get_msgs(
+        path=["NYops", "Margin Calls Citi"], count=count, subject__startswith="262966"
+    )
+    DATA_DIR = DAILY_DIR / "CITI_reports"
+    for msg in emails:
+        for attach in msg.attachments:
+            fname = attach.name
+            p = DATA_DIR / fname
+            if not p.exists():
+                p.write_bytes(attach.content)
+
+
+def load_pdf(file_path):
+    proc = subprocess.run(
+        ["pdftohtml", "-xml", "-stdout", "-i", file_path.as_posix()],
+        capture_output=True,
+    )
+    soup = BeautifulSoup(proc.stdout, features="lxml")
+    l = soup.findAll("text")
+    l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"])))
+    return l
+
+
+def get_col(l, top, bottom, left, right):
+    return [
+        c.text
+        for c in l
+        if int(c["left"]) >= left
+        and int(c["left"]) < right
+        and int(c["top"]) >= top
+        and int(c["top"]) < bottom
+    ]
+
+
+def parse_num(s):
+    s = s.replace(",", "")
+    if s[0] == "(":
+        return -float(s[1:-1])
+    else:
+        return float(s)
+
+
+def get_df(l, col1, col2, col3):
+    df = pd.DataFrame(
+        {"amount": get_col(l, *col2), "currency": get_col(l, *col3)},
+        index=get_col(l, *col1),
+    )
+    df.amount = df.amount.apply(parse_num)
+    df.index = df.index.str.lstrip()
+    return df
+
+
+def get_total_collateral(d):
+    try:
+        fname = next(
+            (DAILY_DIR / "CITI_reports").glob(
+                f"262966_MarginNotice_{d.strftime('%Y%m%d')}_*.pdf"
+            )
+        )
+    except StopIteration:
+        raise FileNotFoundError(f"CITI file not found for date {d.date()}")
+    l = load_pdf(fname)
+    col1 = (370, 500, 70, 100)
+    col2 = (370, 500, 100, 500)
+    col3 = (370, 500, 500, 600)
+
+    variation_margin = get_df(l, col1, col2, col3)
+    anchor = next(c for c in l if c.text == "Non Regulatory Initial Margin")
+    top = int(anchor["top"]) + 10
+    bottom = top + 150
+    col1 = (top, bottom, 70, 100)
+    col2 = (top, bottom, 100, 505)
+    col3 = (top, bottom, 505, 600)
+    initial_margin = get_df(l, col1, col2, col3)
+    return (
+        variation_margin.loc["VM Total Collateral", "amount"]
+        + initial_margin.loc["Non Reg IM Total Collateral", "amount"]
+    )
+
+
+def collateral(d, dawn_trades, *args):
+    df = load_file(d)
+    collat = get_total_collateral(d - BDay())
+    df = df[["Operations File", "Market Value", "BasicAmt"]].dropna(
+        subset=["Operations File"]
+    )  # missing Operations File means assignment usually
+    df = df.merge(
+        dawn_trades, how="left", left_on="Operations File", right_on="cpty_id"
+    )
+    missing_ids = df.loc[df.cpty_id.isnull(), "Operations File"]
+    if not missing_ids.empty:
+        raise ValueError(f"{missing_ids.tolist()} not in the database")
+    df = df.groupby("folder").sum()
+    df = df.sum(axis=1).to_frame(name="Amount")
+    df["Currency"] = "USD"
+    df = df.reset_index()
+    df.columns = ["Strategy", "Amount", "Currency"]
+    df.Amount *= -1
+    df = df.append(
+        {
+            "Strategy": "M_CSH_CASH",
+            "Amount": collat - df.Amount.sum(),
+            "Currency": "USD",
+        },
+        ignore_index=True,
+    )
+    df["date"] = d - bus_day
+    return df.set_index("Strategy")