import pandas as pd import subprocess from bs4 import BeautifulSoup from pandas.tseries.offsets import BDay from . import DAILY_DIR, bus_day def load_file(d): try: fname = next( (DAILY_DIR / "CITI_reports").glob( f"262966_Portfolio_{d.strftime('%Y%m%d')}*" ) ) except StopIteration: raise FileNotFoundError(f"CITI file not found for date {d}") return pd.read_excel(fname, skiprows=6, skipfooter=2) def download_files(count=20): from exchange import ExchangeMessage em = ExchangeMessage() emails = em.get_msgs( path=["NYops", "Margin Calls Citi"], count=count, subject__startswith="262966" ) DATA_DIR = DAILY_DIR / "CITI_reports" for msg in emails: for attach in msg.attachments: fname = attach.name p = DATA_DIR / fname if not p.exists(): p.write_bytes(attach.content) def load_pdf(file_path): proc = subprocess.run( ["pdftohtml", "-xml", "-stdout", "-i", file_path.as_posix()], capture_output=True, ) soup = BeautifulSoup(proc.stdout, features="lxml") l = soup.findAll("text") l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"]))) return l def get_col(l, top, bottom, left, right): return [ c.text for c in l if int(c["left"]) >= left and int(c["left"]) < right and int(c["top"]) >= top and int(c["top"]) < bottom ] def parse_num(s): s = s.replace(",", "") if s[0] == "(": return -float(s[1:-1]) else: return float(s) def get_df(l, col1, col2, col3): df = pd.DataFrame( {"amount": get_col(l, *col2), "currency": get_col(l, *col3)}, index=get_col(l, *col1), ) df.amount = df.amount.apply(parse_num) df.index = df.index.str.lstrip() return df def get_total_collateral(d): try: fname = next( (DAILY_DIR / "CITI_reports").glob( f"262966_MarginNotice_{d.strftime('%Y%m%d')}_*.pdf" ) ) except StopIteration: raise FileNotFoundError(f"CITI file not found for date {d.date()}") l = load_pdf(fname) col1 = (370, 500, 70, 100) col2 = (370, 500, 100, 500) col3 = (370, 500, 500, 600) variation_margin = get_df(l, col1, col2, col3) anchor = next(c for c in l if c.text == "Non Regulatory Initial Margin") top = int(anchor["top"]) + 10 bottom = top + 150 col1 = (top, bottom, 70, 100) col2 = (top, bottom, 100, 505) col3 = (top, bottom, 505, 600) initial_margin = get_df(l, col1, col2, col3) return ( variation_margin.loc["VM Total Collateral", "amount"] + initial_margin.loc["Non Reg IM Total Collateral", "amount"] ) def collateral(d, dawn_trades, *args): df = load_file(d) collat = get_total_collateral(d - BDay()) df = df[["Operations File", "Market Value", "BasicAmt"]].dropna( subset=["Operations File"] ) # missing Operations File means assignment usually df = df.merge( dawn_trades, how="left", left_on="Operations File", right_on="cpty_id" ) missing_ids = df.loc[df.cpty_id.isnull(), "Operations File"] if not missing_ids.empty: raise ValueError(f"{missing_ids.tolist()} not in the database") df = df.groupby("folder").sum() df = df.sum(axis=1).to_frame(name="Amount") df["Currency"] = "USD" df = df.reset_index() df.columns = ["Strategy", "Amount", "Currency"] df.Amount *= -1 df = df.append( { "Strategy": "M_CSH_CASH", "Amount": collat - df.Amount.sum(), "Currency": "USD", }, ignore_index=True, ) df["date"] = d - bus_day return df.set_index("Strategy")