1 files changed, 127 insertions, 206 deletions
diff --git a/python/monthend_interest_recon.py b/python/monthend_interest_recon.py
index 1e29a520..2332a97b 100644
--- a/python/monthend_interest_recon.py
+++ b/python/monthend_interest_recon.py
@@ -1,264 +1,185 @@
+from serenitas.utils.env import DAILY_DIR
+from serenitas.utils.exchange import ExchangeMessage
 import datetime
 import logging
 import argparse
-from collections import defaultdict
-from io import StringIO
-import os
 
+from collateral.common import load_pdf
+from pathlib import Path
 import pandas as pd
-from dateutil.relativedelta import relativedelta
-from dataclasses import dataclass
-import openpyxl
-from zipfile import BadZipFile
-
-from serenitas.utils.env import DAILY_DIR
-from serenitas.utils.exchange import ExchangeMessage, FileAttachment
+from collections import defaultdict
 
-from collateral.common import load_pdf
 from interest_statement import export_data
-from report_ops.misc import em_date_filter
+from dateutil.relativedelta import relativedelta
 
 
 logger = logging.getLogger(__name__)
 
 
-def get_pat(month, year, counterparty):
-    match counterparty:
-        case "BoA":
-            month_abbr = datetime.datetime.strptime(str(month), "%m").strftime("%b")
-            pattern = f"??{month_abbr}{year}*.pdf"
-        case "BNP":
-            month_name = datetime.datetime.strptime(str(month), "%m").strftime("%B")
-            pattern = (
-                f"Interest Statement SCAR GCM_RVM - USD - ?? {month_name} {year}.pdf"
-            )
-        case "CITI":
-            pattern = f"*_USD_ICS_SYSTEM.*.{year}{str(month).zfill(2)}??.xlsx"
-        case "JPM":
-            pattern = f"CSINSTMT-*-{str(year)[2:]}{str(month).zfill(2)}??-*_*_*.pdf"
-        case "GS":
-            pattern = f"MVPInterestStatement_{year}{str(month).zfill(2)}??_*_*.pdf"
-        case "MS":
-            pattern = f"*IntStmt_{year}{str(month).zfill(2)}??.xls"
-        case _:
-            raise ValueError(f"Missing counterparty: {counterparty}")
-    return pattern
-
-
-def download_messages(em, counterparties, start, end):
-    for counterparty in counterparties:
-        for msg in em.get_msgs(
-            20, path=["Interest", counterparty], **em_date_filter(em, start, end)
-        ):
-            BASE_DIR = (
-                DAILY_DIR
-                / "Serenitas"
-                / f"{counterparty}_reports"
-                / "Interest Statements"
-            )
-            for attach in msg.attachments:
-                p = BASE_DIR / attach.name
-                p.parent.mkdir(parents=True, exist_ok=True)
-                if not p.exists():
-                    p.write_bytes(attach.content)
-
-
-@dataclass
-class InterestCounterparty:
-    month: datetime.date.month
-    year: datetime.date.year
-    _registry = {}
-
-    def __init_subclass__(cls, name):
-        cls.name = name
-        cls._registry[name] = cls
+def get_fpath(counterparty, save=False):
+    if save:
+        return (
+            DAILY_DIR / "Serenitas" / f"{counterparty}_reports" / "Interest Statements"
+        )
+    else:
+        return DAILY_DIR / "Serenitas" / "MonthlyInterest" / f"{counterparty}_reports"
 
-    def __class_getitem__(cls, name: str):
-        return cls._registry[name]
 
-    def yield_files(self):
-        BASE_DIR = (
-            DAILY_DIR / "Serenitas" / f"{self.name}_reports" / "Interest Statements"
-        )
-        yield sorted(BASE_DIR.glob(self.pat), key=lambda x: -os.path.getmtime(x))[0]
+def download_messages(em, counterparty, start, end, save=False):
+    for msg in em.get_msgs(
+        20, path=["Interest", counterparty], start_date=start, end_date=end
+    ):
+        base_dir = get_fpath(counterparty, save)
+        for attach in msg.attachments:
+            fname = attach.name
+            if (counterparty == "CS") and not ("Interest" in fname):
+                continue
+            p = base_dir / fname
+            if not p.parent.exists():
+                p.parent.mkdir(parents=True)
+            if not p.exists():
+                p.write_bytes(attach.content)
 
-    @property
-    def pat(self):
-        statement_month = datetime.date(
-            year=self.year, month=self.month, day=1
-        ) + relativedelta(months=1)
-        return get_pat(statement_month.month, statement_month.year, self.name)
 
+def get_CS(g):
+    for e in g[-1]:
+        if "This interest, margin" in e.text:
+            if value == "No Accruals to Report":
+                return 0
+            return float(value)
+        value = e.text
 
-class BoA(InterestCounterparty, name="BoA"):
-    @staticmethod
-    def get_interest_amount(file_path):
-        pdf = load_pdf(file_path)
-        for e, n in zip(pdf, pdf[1:]):
-            if "Net interest Amount" in e.text:
-                return -float(
-                    n.text.replace("(", "-").replace(")", "").replace(",", "")
-                )
 
+def get_BNP(l):
+    for e, n in zip(l, l[1:]):
+        if "Due to" in e.text:
+            value = n.text.replace(",", "")
+            return -float(value)
 
-class BNP(InterestCounterparty, name="BNP"):
-    @staticmethod
-    def get_interest_amount(file_path):
-        pdf = load_pdf(file_path)
-        for e, n in zip(pdf, pdf[1:]):
-            if "Due to" in e.text:
-                value = n.text.replace(",", "")
-                return -float(value)
 
-    @property
-    def pat(self):
-        # BNP files reflect month of interest rather than month of statement
-        return get_pat(self.month, self.year, self.name)
+def get_CITI(path):
+    df = pd.read_excel(path)
+    for row in df.itertuples():
+        if "Net Interest Due To CP" in row or "Net Interest Due to Citi" in row:
+            return -row._6
 
 
-class CITI(InterestCounterparty, name="CITI"):
-    @staticmethod
-    def get_interest_amount(file_path):
-        sheet = openpyxl.load_workbook(file_path).active
-        for row in sheet.rows:
-            if row[1].value in (
-                "Net Interest Due To CP",
-                "Net Interest Due to Citi",
-            ):
-                return -row[5].value
+def get_GS(l):
+    for e, n in zip(l, l[1:]):
+        if "due to" in e.text:
+            return float(n.text.replace("USD", "").replace(",", ""))
 
-    def yield_files(self):
-        BASE_DIR = (
-            DAILY_DIR / "Serenitas" / f"{self.name}_reports" / "Interest Statements"
-        )
-        yield from BASE_DIR.glob(self.pat)  # Citi has two files VM and IM files
 
+def get_MS(path):
+    df = pd.read_excel(path)
+    return -round(df["LOCAL_ACCRUAL"].sum(), 2)
 
-class JPM(InterestCounterparty, name="JPM"):
-    @staticmethod
-    def get_interest_amount(file_path):
-        pdf = load_pdf(file_path)
-        for e in pdf:
-            if "Page" in e.text:
-                return float(value.replace(",", ""))
-            value = e.text
 
-    @property
-    def pat(self):
-        # JPM files reflect month of interest rather than month of statement
-        return get_pat(self.month, self.year, self.name)
+def get_BoA(l):
+    for e, n in zip(l, l[1:]):
+        if "Net interest Amount" in e.text:
+            return -float(n.text.replace("(", "-").replace(")", "").replace(",", ""))
 
 
-class GS(InterestCounterparty, name="GS"):
-    @staticmethod
-    def get_interest_amount(file_path):
-        pdf = load_pdf(file_path)
-        for e, n in zip(pdf, pdf[1:]):
-            if "due to" in e.text:
-                return float(n.text.replace("USD", "").replace(",", ""))
+def get_JPM(g):
+    for e in g:
+        if "Page" in e.text:
+            return float(value.replace(",", ""))
+        value = e.text
 
-    @property
-    def pat(self):
-        # GS files reflect month of interest rather than month of statement
-        return get_pat(self.month, self.year, self.name)
 
+def start_end(date):
+    date = date - relativedelta(months=1)
+    start = datetime.date(date.year, date.month, 1)
+    end = start + relativedelta(months=1)
+    end -= datetime.timedelta(days=1)
+    return start, end
 
-class MS(InterestCounterparty, name="MS"):
-    @staticmethod
-    def get_interest_amount(file_path):
-        df = pd.read_excel(file_path)
-        return -round(df["LOCAL_ACCRUAL"].sum(), 2)
 
-    @property
-    def pat(self):
-        # MS files reflect previous business day, could potenitally be the same month so we'll have to edit accordingly
-        return get_pat(self.month, self.year, self.name)
+def get_interest(counterparties, save=False):
+    interest_amounts = defaultdict(float)
+    for cp in counterparties:
+        try:
+            func = globals()[f"get_{cp}"]
+        except KeyError:
+            print(f"Missing cp {cp}")
+        base_path = Path("/home/serenitas/Daily/Serenitas/MonthlyInterest/")
+        if cp in ("CITI", "MS"):
+            for p in (base_path / f"{cp}_reports").glob("*.xls*"):
+                if p.name.startswith("~"):
+                    continue
+                amount = func(p)
+                interest_amounts[cp] = interest_amounts[cp] + amount
+        else:
+            for p in (base_path / f"{cp}_reports").glob("*.pdf"):
+                amount = func(load_pdf(p, pages=True if cp == "CS" else False))
+                interest_amounts[cp] = interest_amounts[cp] + amount
+    return pd.DataFrame(interest_amounts, index=[0]).T.rename(
+        index={"BoA": "BAML_ISDA"}, columns={0: "monthly_statement"}
+    )
 
 
-def parse_args():
-    """Parses command line arguments"""
-    parser = argparse.ArgumentParser(description="Generate IAM file for globeop")
+if __name__ == "__main__":
+    em = ExchangeMessage()
+    counterparties = ["BNP", "CITI", "GS", "MS", "BoA", "JPM"]
+    # First run --save, then run --accept if it looks good
+    parser = argparse.ArgumentParser(description="Save, accept within tolerances")
     parser.add_argument(
-        "monthend",
-        nargs="?",
+        "date",
         type=datetime.date.fromisoformat,
-        default=datetime.date.today().replace(day=1) - datetime.timedelta(days=1),
+        nargs="?",
+        default=datetime.date.today(),
+    )
+    parser.add_argument(
+        "--save",
+        action="store_true",
+        default=False,
+        help="store in proper folders",
     )
     parser.add_argument(
         "--accept",
-        "-a",
         action="store_true",
         default=False,
         help="accept the differences are within tolerance and edit the csv accordingly",
     )
-    return parser.parse_args()
-
+    args = parser.parse_args()
+    start = datetime.date(args.date.year, args.date.month, 1)
 
-def get_statement_totals(counterparties, month, year):
-    interest_amounts = defaultdict(float)
-    for counterparty in counterparties:
-        interest_counterparty = InterestCounterparty[counterparty](month, year)
-        for f in interest_counterparty.yield_files():
-            interest_amounts[counterparty] += interest_counterparty.get_interest_amount(
-                f
-            )
-    return pd.DataFrame.from_dict(
-        interest_amounts, orient="index", columns=["statement_interest"]
-    ).rename(index={"BoA": "BAML_ISDA"})
+    for cp in counterparties:
+        download_messages(em, cp, start, args.date, args.save)
 
-
-def main():
-    args = parse_args()
-    em = ExchangeMessage()
-    download_messages(
-        em,
-        InterestCounterparty._registry.keys(),
-        args.monthend.replace(day=1),
-        args.monthend + relativedelta(months=1),
-    )
+    interest_amounts = get_interest(counterparties, save=args.save)
+    start, end = start_end(args.date)
     global df
-    df = get_statement_totals(
-        InterestCounterparty._registry.keys(), args.monthend.month, args.monthend.year
-    )
-    serenitas_df = export_data(
-        args.monthend - relativedelta(months=1) + datetime.timedelta(days=1),
-        args.monthend,
-    )
     df = pd.merge(
-        serenitas_df.groupby("broker").sum(),
-        df,
+        export_data(start, end).groupby("broker").sum(),
+        interest_amounts,
         how="outer",
         left_index=True,
         right_index=True,
     )
     df = df.fillna(0)
-    df["difference"] = df["amount"] - df["statement_interest"]
+    df["difference"] = df["amount"] - df["monthly_statement"]
     if args.accept:
-        for cp, difference in df["difference"].items():
-            # Match with counterparty if within tolerance
-            try:
-                serenitas_df.at[(cp, "CSH_CASH"), "amount"] -= difference
-            except KeyError:
-                serenitas_df = serenitas_df.reindex(
-                    serenitas_df.index.append(
-                        pd.MultiIndex.from_tuples([(cp, "CSH_CASH")])
-                    )
-                )
-                serenitas_df.at[(cp, "CSH_CASH"), "amount"] = -difference
+        ssnc_df = export_data(start, end)
+        for k, v in df["difference"].items():
+            ssnc_df.loc[k].loc["CSH_CASH"] -= v
+        from io import StringIO
+        from serenitas.utils.exchange import ExchangeMessage, FileAttachment
+
         buf = StringIO()
-        serenitas_df.to_csv(buf)
+        ssnc_df.to_csv(buf)
+        attach = [
+            FileAttachment(
+                name=f"{start:%b}.csv",
+                content=buf.getvalue().encode(),
+            )
+        ]
+        em = ExchangeMessage()
         em.send_email(
-            subject=f"Allocation of Interest for {args.monthend:%b-%y}",
-            body="Please see attached for allocation of interest by strategy and counterparty",
+            subject=f"Allocation of Interest for {start:%b}",
+            body="Please see attached",
             to_recipients=("serenitas.otc@sscinc.com", "SERENITAS.ops@sscinc.com"),
             cc_recipients=("nyops@lmcg.com",),
-            attach=[
-                FileAttachment(
-                    name=f"{args.monthend:%b-%y}.csv", content=buf.getvalue().encode()
-                )
-            ],
+            attach=attach,
         )
-
-
-if __name__ == "__main__":
-    main()