diff options
| -rw-r--r-- | python/monthend_interest_recon.py | 333 |
1 files changed, 127 insertions, 206 deletions
diff --git a/python/monthend_interest_recon.py b/python/monthend_interest_recon.py index 1e29a520..2332a97b 100644 --- a/python/monthend_interest_recon.py +++ b/python/monthend_interest_recon.py @@ -1,264 +1,185 @@ +from serenitas.utils.env import DAILY_DIR +from serenitas.utils.exchange import ExchangeMessage import datetime import logging import argparse -from collections import defaultdict -from io import StringIO -import os +from collateral.common import load_pdf +from pathlib import Path import pandas as pd -from dateutil.relativedelta import relativedelta -from dataclasses import dataclass -import openpyxl -from zipfile import BadZipFile - -from serenitas.utils.env import DAILY_DIR -from serenitas.utils.exchange import ExchangeMessage, FileAttachment +from collections import defaultdict -from collateral.common import load_pdf from interest_statement import export_data -from report_ops.misc import em_date_filter +from dateutil.relativedelta import relativedelta logger = logging.getLogger(__name__) -def get_pat(month, year, counterparty): - match counterparty: - case "BoA": - month_abbr = datetime.datetime.strptime(str(month), "%m").strftime("%b") - pattern = f"??{month_abbr}{year}*.pdf" - case "BNP": - month_name = datetime.datetime.strptime(str(month), "%m").strftime("%B") - pattern = ( - f"Interest Statement SCAR GCM_RVM - USD - ?? {month_name} {year}.pdf" - ) - case "CITI": - pattern = f"*_USD_ICS_SYSTEM.*.{year}{str(month).zfill(2)}??.xlsx" - case "JPM": - pattern = f"CSINSTMT-*-{str(year)[2:]}{str(month).zfill(2)}??-*_*_*.pdf" - case "GS": - pattern = f"MVPInterestStatement_{year}{str(month).zfill(2)}??_*_*.pdf" - case "MS": - pattern = f"*IntStmt_{year}{str(month).zfill(2)}??.xls" - case _: - raise ValueError(f"Missing counterparty: {counterparty}") - return pattern - - -def download_messages(em, counterparties, start, end): - for counterparty in counterparties: - for msg in em.get_msgs( - 20, path=["Interest", counterparty], **em_date_filter(em, start, end) - ): - BASE_DIR = ( - DAILY_DIR - / "Serenitas" - / f"{counterparty}_reports" - / "Interest Statements" - ) - for attach in msg.attachments: - p = BASE_DIR / attach.name - p.parent.mkdir(parents=True, exist_ok=True) - if not p.exists(): - p.write_bytes(attach.content) - - -@dataclass -class InterestCounterparty: - month: datetime.date.month - year: datetime.date.year - _registry = {} - - def __init_subclass__(cls, name): - cls.name = name - cls._registry[name] = cls +def get_fpath(counterparty, save=False): + if save: + return ( + DAILY_DIR / "Serenitas" / f"{counterparty}_reports" / "Interest Statements" + ) + else: + return DAILY_DIR / "Serenitas" / "MonthlyInterest" / f"{counterparty}_reports" - def __class_getitem__(cls, name: str): - return cls._registry[name] - def yield_files(self): - BASE_DIR = ( - DAILY_DIR / "Serenitas" / f"{self.name}_reports" / "Interest Statements" - ) - yield sorted(BASE_DIR.glob(self.pat), key=lambda x: -os.path.getmtime(x))[0] +def download_messages(em, counterparty, start, end, save=False): + for msg in em.get_msgs( + 20, path=["Interest", counterparty], start_date=start, end_date=end + ): + base_dir = get_fpath(counterparty, save) + for attach in msg.attachments: + fname = attach.name + if (counterparty == "CS") and not ("Interest" in fname): + continue + p = base_dir / fname + if not p.parent.exists(): + p.parent.mkdir(parents=True) + if not p.exists(): + p.write_bytes(attach.content) - @property - def pat(self): - statement_month = datetime.date( - year=self.year, month=self.month, day=1 - ) + relativedelta(months=1) - return get_pat(statement_month.month, statement_month.year, self.name) +def get_CS(g): + for e in g[-1]: + if "This interest, margin" in e.text: + if value == "No Accruals to Report": + return 0 + return float(value) + value = e.text -class BoA(InterestCounterparty, name="BoA"): - @staticmethod - def get_interest_amount(file_path): - pdf = load_pdf(file_path) - for e, n in zip(pdf, pdf[1:]): - if "Net interest Amount" in e.text: - return -float( - n.text.replace("(", "-").replace(")", "").replace(",", "") - ) +def get_BNP(l): + for e, n in zip(l, l[1:]): + if "Due to" in e.text: + value = n.text.replace(",", "") + return -float(value) -class BNP(InterestCounterparty, name="BNP"): - @staticmethod - def get_interest_amount(file_path): - pdf = load_pdf(file_path) - for e, n in zip(pdf, pdf[1:]): - if "Due to" in e.text: - value = n.text.replace(",", "") - return -float(value) - @property - def pat(self): - # BNP files reflect month of interest rather than month of statement - return get_pat(self.month, self.year, self.name) +def get_CITI(path): + df = pd.read_excel(path) + for row in df.itertuples(): + if "Net Interest Due To CP" in row or "Net Interest Due to Citi" in row: + return -row._6 -class CITI(InterestCounterparty, name="CITI"): - @staticmethod - def get_interest_amount(file_path): - sheet = openpyxl.load_workbook(file_path).active - for row in sheet.rows: - if row[1].value in ( - "Net Interest Due To CP", - "Net Interest Due to Citi", - ): - return -row[5].value +def get_GS(l): + for e, n in zip(l, l[1:]): + if "due to" in e.text: + return float(n.text.replace("USD", "").replace(",", "")) - def yield_files(self): - BASE_DIR = ( - DAILY_DIR / "Serenitas" / f"{self.name}_reports" / "Interest Statements" - ) - yield from BASE_DIR.glob(self.pat) # Citi has two files VM and IM files +def get_MS(path): + df = pd.read_excel(path) + return -round(df["LOCAL_ACCRUAL"].sum(), 2) -class JPM(InterestCounterparty, name="JPM"): - @staticmethod - def get_interest_amount(file_path): - pdf = load_pdf(file_path) - for e in pdf: - if "Page" in e.text: - return float(value.replace(",", "")) - value = e.text - @property - def pat(self): - # JPM files reflect month of interest rather than month of statement - return get_pat(self.month, self.year, self.name) +def get_BoA(l): + for e, n in zip(l, l[1:]): + if "Net interest Amount" in e.text: + return -float(n.text.replace("(", "-").replace(")", "").replace(",", "")) -class GS(InterestCounterparty, name="GS"): - @staticmethod - def get_interest_amount(file_path): - pdf = load_pdf(file_path) - for e, n in zip(pdf, pdf[1:]): - if "due to" in e.text: - return float(n.text.replace("USD", "").replace(",", "")) +def get_JPM(g): + for e in g: + if "Page" in e.text: + return float(value.replace(",", "")) + value = e.text - @property - def pat(self): - # GS files reflect month of interest rather than month of statement - return get_pat(self.month, self.year, self.name) +def start_end(date): + date = date - relativedelta(months=1) + start = datetime.date(date.year, date.month, 1) + end = start + relativedelta(months=1) + end -= datetime.timedelta(days=1) + return start, end -class MS(InterestCounterparty, name="MS"): - @staticmethod - def get_interest_amount(file_path): - df = pd.read_excel(file_path) - return -round(df["LOCAL_ACCRUAL"].sum(), 2) - @property - def pat(self): - # MS files reflect previous business day, could potenitally be the same month so we'll have to edit accordingly - return get_pat(self.month, self.year, self.name) +def get_interest(counterparties, save=False): + interest_amounts = defaultdict(float) + for cp in counterparties: + try: + func = globals()[f"get_{cp}"] + except KeyError: + print(f"Missing cp {cp}") + base_path = Path("/home/serenitas/Daily/Serenitas/MonthlyInterest/") + if cp in ("CITI", "MS"): + for p in (base_path / f"{cp}_reports").glob("*.xls*"): + if p.name.startswith("~"): + continue + amount = func(p) + interest_amounts[cp] = interest_amounts[cp] + amount + else: + for p in (base_path / f"{cp}_reports").glob("*.pdf"): + amount = func(load_pdf(p, pages=True if cp == "CS" else False)) + interest_amounts[cp] = interest_amounts[cp] + amount + return pd.DataFrame(interest_amounts, index=[0]).T.rename( + index={"BoA": "BAML_ISDA"}, columns={0: "monthly_statement"} + ) -def parse_args(): - """Parses command line arguments""" - parser = argparse.ArgumentParser(description="Generate IAM file for globeop") +if __name__ == "__main__": + em = ExchangeMessage() + counterparties = ["BNP", "CITI", "GS", "MS", "BoA", "JPM"] + # First run --save, then run --accept if it looks good + parser = argparse.ArgumentParser(description="Save, accept within tolerances") parser.add_argument( - "monthend", - nargs="?", + "date", type=datetime.date.fromisoformat, - default=datetime.date.today().replace(day=1) - datetime.timedelta(days=1), + nargs="?", + default=datetime.date.today(), + ) + parser.add_argument( + "--save", + action="store_true", + default=False, + help="store in proper folders", ) parser.add_argument( "--accept", - "-a", action="store_true", default=False, help="accept the differences are within tolerance and edit the csv accordingly", ) - return parser.parse_args() - + args = parser.parse_args() + start = datetime.date(args.date.year, args.date.month, 1) -def get_statement_totals(counterparties, month, year): - interest_amounts = defaultdict(float) - for counterparty in counterparties: - interest_counterparty = InterestCounterparty[counterparty](month, year) - for f in interest_counterparty.yield_files(): - interest_amounts[counterparty] += interest_counterparty.get_interest_amount( - f - ) - return pd.DataFrame.from_dict( - interest_amounts, orient="index", columns=["statement_interest"] - ).rename(index={"BoA": "BAML_ISDA"}) + for cp in counterparties: + download_messages(em, cp, start, args.date, args.save) - -def main(): - args = parse_args() - em = ExchangeMessage() - download_messages( - em, - InterestCounterparty._registry.keys(), - args.monthend.replace(day=1), - args.monthend + relativedelta(months=1), - ) + interest_amounts = get_interest(counterparties, save=args.save) + start, end = start_end(args.date) global df - df = get_statement_totals( - InterestCounterparty._registry.keys(), args.monthend.month, args.monthend.year - ) - serenitas_df = export_data( - args.monthend - relativedelta(months=1) + datetime.timedelta(days=1), - args.monthend, - ) df = pd.merge( - serenitas_df.groupby("broker").sum(), - df, + export_data(start, end).groupby("broker").sum(), + interest_amounts, how="outer", left_index=True, right_index=True, ) df = df.fillna(0) - df["difference"] = df["amount"] - df["statement_interest"] + df["difference"] = df["amount"] - df["monthly_statement"] if args.accept: - for cp, difference in df["difference"].items(): - # Match with counterparty if within tolerance - try: - serenitas_df.at[(cp, "CSH_CASH"), "amount"] -= difference - except KeyError: - serenitas_df = serenitas_df.reindex( - serenitas_df.index.append( - pd.MultiIndex.from_tuples([(cp, "CSH_CASH")]) - ) - ) - serenitas_df.at[(cp, "CSH_CASH"), "amount"] = -difference + ssnc_df = export_data(start, end) + for k, v in df["difference"].items(): + ssnc_df.loc[k].loc["CSH_CASH"] -= v + from io import StringIO + from serenitas.utils.exchange import ExchangeMessage, FileAttachment + buf = StringIO() - serenitas_df.to_csv(buf) + ssnc_df.to_csv(buf) + attach = [ + FileAttachment( + name=f"{start:%b}.csv", + content=buf.getvalue().encode(), + ) + ] + em = ExchangeMessage() em.send_email( - subject=f"Allocation of Interest for {args.monthend:%b-%y}", - body="Please see attached for allocation of interest by strategy and counterparty", + subject=f"Allocation of Interest for {start:%b}", + body="Please see attached", to_recipients=("serenitas.otc@sscinc.com", "SERENITAS.ops@sscinc.com"), cc_recipients=("nyops@lmcg.com",), - attach=[ - FileAttachment( - name=f"{args.monthend:%b-%y}.csv", content=buf.getvalue().encode() - ) - ], + attach=attach, ) - - -if __name__ == "__main__": - main() |
