diff options
| -rw-r--r-- | python/collateral/citi.py | 25 | ||||
| -rw-r--r-- | python/collateral/common.py | 24 | ||||
| -rw-r--r-- | python/collateral/cs.py | 45 |
3 files changed, 34 insertions, 60 deletions
diff --git a/python/collateral/citi.py b/python/collateral/citi.py index 5ea8ce60..acda4b1f 100644 --- a/python/collateral/citi.py +++ b/python/collateral/citi.py @@ -1,8 +1,7 @@ import pandas as pd -import subprocess -from bs4 import BeautifulSoup from pandas.tseries.offsets import BDay from . import DAILY_DIR, bus_day +from .common import load_pdf, get_col def load_file(d): @@ -33,28 +32,6 @@ def download_files(count=20): p.write_bytes(attach.content) -def load_pdf(file_path): - proc = subprocess.run( - ["pdftohtml", "-xml", "-stdout", "-i", file_path.as_posix()], - capture_output=True, - ) - soup = BeautifulSoup(proc.stdout, features="lxml") - l = soup.findAll("text") - l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"]))) - return l - - -def get_col(l, top, bottom, left, right): - return [ - c.text - for c in l - if int(c["left"]) >= left - and int(c["left"]) < right - and int(c["top"]) >= top - and int(c["top"]) < bottom - ] - - def parse_num(s): s = s.replace(",", "") if s[0] == "(": diff --git a/python/collateral/common.py b/python/collateral/common.py index 882a3a74..64498fca 100644 --- a/python/collateral/common.py +++ b/python/collateral/common.py @@ -1,5 +1,7 @@ import datetime import logging +import subprocess +from bs4 import BeautifulSoup import pandas as pd from exchangelib import HTMLBody from sqlalchemy.engine import Engine @@ -102,3 +104,25 @@ def send_email(d: datetime.date, df: pd.DataFrame) -> None: ["serenitas.otc@sscinc.com"], ["nyops@lmcg.com"], ) + + +def load_pdf(file_path): + proc = subprocess.run( + ["pdftohtml", "-xml", "-stdout", "-i", file_path.as_posix()], + capture_output=True, + ) + soup = BeautifulSoup(proc.stdout, features="lxml") + l = soup.findAll("text") + l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"]))) + return l + + +def get_col(l, top, bottom, left, right): + return [ + c.text + for c in l + if int(c["left"]) >= left + and int(c["left"]) < right + and int(c["top"]) >= top + and int(c["top"]) < bottom + ] diff --git a/python/collateral/cs.py b/python/collateral/cs.py index bede3114..8e673f94 100644 --- a/python/collateral/cs.py +++ b/python/collateral/cs.py @@ -7,6 +7,7 @@ from bs4 import BeautifulSoup from operator import itemgetter from pandas.tseries.offsets import BDay from xlrd import open_workbook +from .common import load_pdf def download_files(count=20): @@ -45,46 +46,17 @@ def download_files(count=20): p.write_bytes(attach.content) -def load_pdf(file_path: pathlib.Path): - proc = subprocess.run( - ["pdftohtml", "-xml", "-l", "1", "-stdout", "-i", file_path.as_posix()], - capture_output=True, - ) - soup = BeautifulSoup(proc.stdout, features="lxml") - l = soup.findAll("text") - for e in l: - if e.text.startswith( - "Market Value of Collateral required pursuant to this notice" - ): - sib = e.next_siblings - next(sib) - return float(next(sib).text.replace(",", "")) - - def get_collateral(d): DATA_DIR = DAILY_DIR / "CS_reports" # get most recent file before current date - def get_date(p): - return datetime.date.fromisoformat(p.stem.split(" ", 1)[0]) + pdf_file = DATA_DIR / f"CollateralCptyStatement161SerenitasCGMFRVM_{d:%m%d%Y}.pdf" - files = ((f, get_date(f)) for f in DATA_DIR.glob("*.xls")) - files = sorted(filter(lambda t: t[1] <= d, files), key=itemgetter(1), reverse=True) - excel_file, date = files[0] - pdf_file = DATA_DIR / f"{date} Margin_Notice161 Serenitas CGMF RVM.pdf" - last_margin_call = load_pdf(pdf_file) - if date == d: # margin call is current do not include - last_margin_call = 0 - wb = open_workbook(files[0][0]) - s = wb.sheet_by_index(0) - i = 0 - im = 0.0 - for i, v in enumerate(s.col_values(0)): - if s.cell_value(i, 4) == "Total IM (USD):": - im = s.cell_value(i, 5) - if v.startswith("Total Value of Collateral"): - return s.cell_value(i, 1) + last_margin_call + im - if v.startswith("No Positions to Report"): - return 0.0 + g = iter(load_pdf(pdf_file)) + for e in g: + if e.text == "Cash USD (US Dollar)": + next(g) + value = next(g).text + return float(value.strip().replace(",", "")) def collateral(d, dawn_trades, *args): @@ -119,5 +91,6 @@ def collateral(d, dawn_trades, *args): }, ignore_index=True, ) + breakpoint() df["date"] = d return df.set_index("Strategy") |
