aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/parse_citi_pdf.py53
1 files changed, 53 insertions, 0 deletions
diff --git a/python/parse_citi_pdf.py b/python/parse_citi_pdf.py
new file mode 100644
index 00000000..ccbcd56b
--- /dev/null
+++ b/python/parse_citi_pdf.py
@@ -0,0 +1,53 @@
+import pandas as pd
+import subprocess
+from bs4 import BeautifulSoup
+from pathlib import Path
+
+def load_pdf(file_path):
+ proc = subprocess.run(["pdftohtml", "-xml", "-stdout", "-i",
+ file_path.as_posix()],
+ capture_output=True)
+ soup = BeautifulSoup(proc.stdout, features="lxml")
+ l = soup.findAll("text")
+ l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"])))
+ return l
+
+def get_col(l, top, bottom, left, right):
+ return [c.text for c in l if int(c["left"]) >= left and \
+ int(c["left"]) < right and \
+ int(c["top"]) >= top and int(c["top"]) < bottom ]
+
+def parse_num(s):
+ s = s.replace(",", "")
+ if s[0] == "(":
+ return -float(s[1:-1])
+ else:
+ return float(s)
+
+def get_df(l, col1, col2, col3):
+ df = pd.DataFrame({"amount": get_col(l, *col2),
+ "currency": get_col(l, *col3)},
+ index=get_col(l, *col1))
+ df.amount = df.amount.apply(parse_num)
+ df.index = df.index.str.lstrip()
+ return df
+
+def get_citi_collateral(d):
+ try:
+ fname = next((DAILY_DIR / "CITI_reports").
+ glob("262966_MarginNotice_*_.pdf"))
+ except StopIteration:
+ raise FileNotFoundError(f"CITI file not found for date {d}")
+ l = load_pdf(fname)
+ col1 = (370, 500, 70, 100)
+ col2 = (370, 500, 100, 500)
+ col3 = (370, 500, 500, 600)
+
+ variation_margin = get_df(l, col1, col2, col3)
+
+ col1 = (650, 800, 70, 100)
+ col2 = (650, 800, 100, 500)
+ col3 = (650, 800, 500, 600)
+ initial_margin = get_df(l, col1, col2, col3)
+ return variation_margin.loc["VM Total Collateral", "amount"] + \
+ initial_margin.loc["Non Reg IM Total Collateral", "amount"]