diff options
Diffstat (limited to 'python/collateral/citi.py')
| -rw-r--r-- | python/collateral/citi.py | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/python/collateral/citi.py b/python/collateral/citi.py new file mode 100644 index 00000000..f7d0818d --- /dev/null +++ b/python/collateral/citi.py @@ -0,0 +1,129 @@ +import pandas as pd +import subprocess +from bs4 import BeautifulSoup +from pandas.tseries.offsets import BDay +from . import ExchangeMessage, DAILY_DIR, bus_day + + +def load_file(d): + try: + fname = next( + (DAILY_DIR / "CITI_reports").glob( + f"262966_Portfolio_{d.strftime('%Y%m%d')}*" + ) + ) + except StopIteration: + raise FileNotFoundError(f"CITI file not found for date {d}") + return pd.read_excel(fname, skiprows=6, skipfooter=2) + + +def download_files(count=20): + em = ExchangeMessage() + emails = em.get_msgs( + path=["NYops", "Margin Calls Citi"], count=count, subject__startswith="262966" + ) + DATA_DIR = DAILY_DIR / "CITI_reports" + for msg in emails: + for attach in msg.attachments: + fname = attach.name + p = DATA_DIR / fname + if not p.exists(): + p.write_bytes(attach.content) + + +def load_pdf(file_path): + proc = subprocess.run( + ["pdftohtml", "-xml", "-stdout", "-i", file_path.as_posix()], + capture_output=True, + ) + soup = BeautifulSoup(proc.stdout, features="lxml") + l = soup.findAll("text") + l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"]))) + return l + + +def get_col(l, top, bottom, left, right): + return [ + c.text + for c in l + if int(c["left"]) >= left + and int(c["left"]) < right + and int(c["top"]) >= top + and int(c["top"]) < bottom + ] + + +def parse_num(s): + s = s.replace(",", "") + if s[0] == "(": + return -float(s[1:-1]) + else: + return float(s) + + +def get_df(l, col1, col2, col3): + df = pd.DataFrame( + {"amount": get_col(l, *col2), "currency": get_col(l, *col3)}, + index=get_col(l, *col1), + ) + df.amount = df.amount.apply(parse_num) + df.index = df.index.str.lstrip() + return df + + +def get_total_collateral(d): + try: + fname = next( + (DAILY_DIR / "CITI_reports").glob( + f"262966_MarginNotice_{d.strftime('%Y%m%d')}_*.pdf" + ) + ) + except StopIteration: + raise FileNotFoundError(f"CITI file not found for date {d.date()}") + l = load_pdf(fname) + col1 = (370, 500, 70, 100) + col2 = (370, 500, 100, 500) + col3 = (370, 500, 500, 600) + + variation_margin = get_df(l, col1, col2, col3) + anchor = next(c for c in l if c.text == "Non Regulatory Initial Margin") + top = int(anchor["top"]) + 10 + bottom = top + 150 + col1 = (top, bottom, 70, 100) + col2 = (top, bottom, 100, 505) + col3 = (top, bottom, 505, 600) + initial_margin = get_df(l, col1, col2, col3) + return ( + variation_margin.loc["VM Total Collateral", "amount"] + + initial_margin.loc["Non Reg IM Total Collateral", "amount"] + ) + + +def collateral(d, dawn_trades, *args): + df = load_file(d) + collat = get_total_collateral(d - BDay()) + df = df[["Operations File", "Market Value", "BasicAmt"]].dropna( + subset=["Operations File"] + ) # missing Operations File means assignment usually + df = df.merge( + dawn_trades, how="left", left_on="Operations File", right_on="cpty_id" + ) + missing_ids = df.loc[df.cpty_id.isnull(), "Operations File"] + if not missing_ids.empty: + raise ValueError(f"{missing_ids.tolist()} not in the database") + df = df.groupby("folder").sum() + df = df.sum(axis=1).to_frame(name="Amount") + df["Currency"] = "USD" + df = df.reset_index() + df.columns = ["Strategy", "Amount", "Currency"] + df.Amount *= -1 + df = df.append( + { + "Strategy": "M_CSH_CASH", + "Amount": collat - df.Amount.sum(), + "Currency": "USD", + }, + ignore_index=True, + ) + df["date"] = d - bus_day + return df.set_index("Strategy") |
