from serenitas.analytics.utils import get_fx from bs4 import BeautifulSoup from io import BytesIO from pandas.tseries.offsets import BDay import logging import pandas as pd import pathlib import requests import xlrd import zipfile from serenitas.utils.env import DAILY_DIR from urllib.parse import urlsplit, parse_qs, urlunsplit, urljoin from xlrd import open_workbook, xldate_as_tuple import pandas as pd from serenitas.utils.db import dawn_engine import datetime import ExchangeMessage logger = logging.getLogger(__name__) def download_messages(em): # Need to fix path for msg in em.get_msgs( path=["SercgmastOps", "Reports"], subject__startswith="Positions-" ): if msg.sender == "mercury-reports@baml.com": soup = BeautifulSoup(msg.body, features="lxml") a = soup.find("a") url = urlsplit(a["href"]) query = parse_qs(url.query) base_url = urlunsplit(url[:2] + ("",) * 3) try: download_from_secure_id( query["id"][0], query["brand"][0], DATA_DIR, base_url ) except ValueError as e: logger.error(e) continue def download_from_secure_id( secure_id: str, brand: str, path: pathlib.Path, base_url="https://secmail.bankofamerica.com", ): password = { "ghorel@lmcg.com": "v4vdMvH9Qe9t", "nyops@lmcg.com": "a6lAkBfqDSHsrkGspYSS", } payload = {} with requests.Session() as session: r = session.get( urljoin(base_url, "formpostdir/securereader"), params={"id": secure_id, "brand": brand}, ) soup = BeautifulSoup(r.content, features="lxml") form = soup.find(id="dialog") if "messagenotfound" in form["action"]: raise ValueError("message not found") for inp in form.find_all("input"): payload[inp["name"]] = inp["value"] payload["dialog:password"] = password[payload["dialog:username"]] r = session.post(base_url + form["action"], data=payload) soup = BeautifulSoup(r.content, features="lxml") form = soup.find(id="readTB") payload = { "readTB": "readTB", "readTB:downloadZipButton": "readTB:downloadZipButton", } for inp in form.find_all("input"): if "ViewState" in inp["name"]: payload["javax.faces.ViewState"] = inp["value"] r = session.post(urljoin(base_url, "securereader/read.jsf"), data=payload) if r.headers["content-type"] == "application/octet-stream": with zipfile.ZipFile(BytesIO(r.content)) as z: for f in z.namelist(): if not f.endswith("html") and not (path / f).exists(): z.extract(f, path=path) def load_cash_report(workdate: datetime.date, path: pathlib.Path): cols = ["Local Market Value", "fund", "date"] df = pd.read_csv(path) df = ( df[df["Asset Class"] == "CASH AND EQUIVALENTS"] .groupby(["Local Currency", "Account Name", "Portfolio ID"]) .sum() ) df["fund"] = "SERCGMAST" df["date"] = workdate df = ( df[cols] .reset_index() .rename( { "Portfolio ID": "account_number", "Local Currency": "currency_code", "Account Name": "account_name", "Local Market Value": "balance", }, axis=1, ) ) df.to_sql("cash_balances", dawn_engine, if_exists="append", index=False) if __name__ == "__main__": import argparse from serenitas.utils.exchange import ExchangeMessage parser = argparse.ArgumentParser() parser.add_argument( "workdate", nargs="?", type=datetime.date.fromisoformat, default=datetime.date.today(), help="working date", ) args = parser.parse_args() em = ExchangeMessage() p = ( DAILY_DIR / str(args.workdate) / "Reports" / f"Positions_{args.workdate:%d %b %Y}.csv" ) download_messages(em) load_cash_report(args.workdate, p)