1 files changed, 193 insertions, 123 deletions
diff --git a/python/monthend_interest_recon.py b/python/monthend_interest_recon.py
index 43fc8d82..f1cb4305 100644
--- a/python/monthend_interest_recon.py
+++ b/python/monthend_interest_recon.py
@@ -1,13 +1,16 @@
 import datetime
 import logging
 import argparse
-from pathlib import Path
 from collections import defaultdict
+from io import StringIO
+import re
+
 import pandas as pd
 from dateutil.relativedelta import relativedelta
+from dataclasses import dataclass
 
 from serenitas.utils.env import DAILY_DIR
-from serenitas.utils.exchange import ExchangeMessage
+from serenitas.utils.exchange import ExchangeMessage, FileAttachment
 
 from collateral.common import load_pdf
 from interest_statement import export_data
@@ -17,170 +20,237 @@ from report_ops.misc import em_date_filter
 logger = logging.getLogger(__name__)
 
 
-def get_fpath(counterparty, save=False):
-    if save:
-        return (
-            DAILY_DIR / "Serenitas" / f"{counterparty}_reports" / "Interest Statements"
-        )
-    else:
-        return DAILY_DIR / "Serenitas" / "MonthlyInterest" / f"{counterparty}_reports"
+def get_pat(month, year, counterparty):
+    match counterparty:
+        case "BoA":
+            month_abbr = datetime.datetime.strptime(str(month), "%m").strftime("%b")
+            pattern = f"??{month_abbr}{year}*.pdf"
+        case "BNP":
+            month_name = datetime.datetime.strptime(str(month), "%m").strftime("%B")
+            pattern = (
+                f"Interest Statement SCAR GCM_RVM - USD - ?? {month_name} {year}.pdf"
+            )
+        case "CITI":
+            pattern = f"*_USD_ICS_SYSTEM.*.{year}{str(month).zfill(2)}??.xlsx"
+        case "JPM":
+            pattern = f"CSINSTMT-*-{str(year)[2:]}{str(month).zfill(2)}??-*_*_*.pdf"
+        case "GS":
+            pattern = f"MVPInterestStatement_{year}{str(month).zfill(2)}??_*_*.pdf"
+        case "MS":
+            pattern = f"*IntStmt_{year}{str(month).zfill(2)}??.xls"
+        case _:
+            raise ValueError(f"Missing counterparty: {counterparty}")
+    return pattern
 
 
-def download_messages(em, counterparty, start, end, save=False):
-    for msg in em.get_msgs(
-        20, path=["Interest", counterparty], **em_date_filter(em, start, end)
-    ):
-        base_dir = get_fpath(counterparty, save)
-        for attach in msg.attachments:
-            fname = attach.name
-            if (counterparty == "CS") and not ("Interest" in fname):
-                continue
-            p = base_dir / fname
-            if not p.parent.exists():
-                p.parent.mkdir(parents=True)
-            if not p.exists():
-                p.write_bytes(attach.content)
+def download_messages(em, counterparties, start, end):
+    for counterparty in counterparties:
+        for msg in em.get_msgs(
+            20, path=["Interest", counterparty], **em_date_filter(em, start, end)
+        ):
+            BASE_DIR = (
+                DAILY_DIR
+                / "Serenitas"
+                / f"{counterparty}_reports"
+                / "Interest Statements"
+            )
+            for attach in msg.attachments:
+                p = BASE_DIR / attach.name
+                p.parent.mkdir(parents=True, exist_ok=True)
+                if not p.exists():
+                    p.write_bytes(attach.content)
 
 
-def get_CS(g):
-    for e in g[-1]:
-        if "This interest, margin" in e.text:
-            if value == "No Accruals to Report":
-                return 0
-            return float(value)
-        value = e.text
+@dataclass
+class InterestCounterparty:
+    month: datetime.date.month
+    year: datetime.date.year
+    _registry = {}
 
+    def __init_subclass__(cls, name):
+        cls.name = name
+        cls._registry[name] = cls
 
-def get_BNP(l):
-    for e, n in zip(l, l[1:]):
-        if "Due to" in e.text:
-            value = n.text.replace(",", "")
-            return -float(value)
+    def __class_getitem__(cls, name: str):
+        return cls._registry[name]
 
+    def yield_files(self):
+        BASE_DIR = (
+            DAILY_DIR / "Serenitas" / f"{self.name}_reports" / "Interest Statements"
+        )
+        yield from BASE_DIR.glob(self.pat)
 
-def get_CITI(path):
-    df = pd.read_excel(path)
-    for row in df.itertuples():
-        if "Net Interest Due To CP" in row or "Net Interest Due to Citi" in row:
-            return -row._6
+    @property
+    def pat(self):
+        statement_month = datetime.date(
+            year=self.year, month=self.month, day=1
+        ) + relativedelta(months=1)
+        return get_pat(statement_month.month, statement_month.year, self.name)
 
 
-def get_GS(l):
-    for e, n in zip(l, l[1:]):
-        if "due to" in e.text:
-            return float(n.text.replace("USD", "").replace(",", ""))
+class BoA(InterestCounterparty, name="BoA"):
+    @staticmethod
+    def get_interest_amount(file_path):
+        pdf = load_pdf(file_path)
+        for e, n in zip(pdf, pdf[1:]):
+            if "Net interest Amount" in e.text:
+                return -float(
+                    n.text.replace("(", "-").replace(")", "").replace(",", "")
+                )
 
 
-def get_MS(path):
-    df = pd.read_excel(path)
-    return -round(df["LOCAL_ACCRUAL"].sum(), 2)
+class BNP(InterestCounterparty, name="BNP"):
+    @staticmethod
+    def get_interest_amount(file_path):
+        pdf = load_pdf(file_path)
+        for e, n in zip(pdf, pdf[1:]):
+            if "Due to" in e.text:
+                value = n.text.replace(",", "")
+                return -float(value)
 
+    @property
+    def pat(self):
+        # BNP files reflect month of interest rather than month of statement
+        return get_pat(self.month, self.year, self.name)
 
-def get_BAML(l):
-    for e, n in zip(l, l[1:]):
-        if "Net interest Amount" in e.text:
-            return -float(n.text.replace("(", "-").replace(")", "").replace(",", ""))
 
+class CITI(InterestCounterparty, name="CITI"):
+    @staticmethod
+    def get_interest_amount(file_path):
+        df = pd.read_excel(file_path, skiprows=14)
+        for row in df.itertuples():
+            if "Net Interest Due To CP" in row or "Net Interest Due to Citi" in row:
+                return -row._6
 
-def get_JPM(g):
-    for e in g:
-        if "Page" in e.text:
-            return float(value.replace(",", ""))
-        value = e.text
 
+class JPM(InterestCounterparty, name="JPM"):
+    @staticmethod
+    def get_interest_amount(file_path):
+        pdf = load_pdf(file_path)
+        for e in pdf:
+            if "Page" in e.text:
+                return float(value.replace(",", ""))
+            value = e.text
 
-def start_end(date):
-    date = date - relativedelta(months=1)
-    start = datetime.date(date.year, date.month, 1)
-    end = start + relativedelta(months=1)
-    end -= datetime.timedelta(days=1)
-    return start, end
+    @property
+    def pat(self):
+        # JPM files reflect month of interest rather than month of statement
+        return get_pat(self.month, self.year, self.name)
 
 
-def get_interest(counterparties, save=False):
-    interest_amounts = defaultdict(float)
-    for cp in counterparties:
-        try:
-            func = globals()[f"get_{cp}"]
-        except KeyError:
-            print(f"Missing cp {cp}")
-        base_path = Path("/home/serenitas/Daily/Serenitas/MonthlyInterest/")
-        if cp in ("CITI", "MS"):
-            for p in (base_path / f"{cp}_reports").glob("*.xls*"):
-                if p.name.startswith("~"):
-                    continue
-                amount = func(p)
-                interest_amounts[cp] = interest_amounts[cp] + amount
-        else:
-            for p in (base_path / f"{cp}_reports").glob("*.pdf"):
-                amount = func(load_pdf(p, pages=True if cp == "CS" else False))
-                interest_amounts[cp] = interest_amounts[cp] + amount
-    return pd.DataFrame(interest_amounts, index=[0]).T.rename(
-        index={"BAML": "BAML_ISDA"}, columns={0: "monthly_statement"}
-    )
+class GS(InterestCounterparty, name="GS"):
+    @staticmethod
+    def get_interest_amount(file_path):
+        pdf = load_pdf(file_path)
+        for e, n in zip(pdf, pdf[1:]):
+            if "due to" in e.text:
+                return float(n.text.replace("USD", "").replace(",", ""))
 
+    @property
+    def pat(self):
+        # GS files reflect month of interest rather than month of statement
+        return get_pat(self.month, self.year, self.name)
 
-if __name__ == "__main__":
-    em = ExchangeMessage()
-    counterparties = ["BNP", "CITI", "GS", "MS", "BAML", "JPM"]
-    # First run --save, then run --accept if it looks good
-    parser = argparse.ArgumentParser(description="Save, accept within tolerances")
+
+class MS(InterestCounterparty, name="MS"):
+    @staticmethod
+    def get_interest_amount(file_path):
+        df = pd.read_excel(file_path)
+        return -round(df["LOCAL_ACCRUAL"].sum(), 2)
+
+    @property
+    def pat(self):
+        # GS files reflect month of interest rather than month of statement
+        return get_pat(self.month, self.year, self.name)
+
+
+# class CS(InterestCounterparty, name='CS'):
+#     @staticmethod
+#     def get_interest_amount(file_path):
+#         pdf = load_pdf(file_path, pages=True)
+#         for row in pdf[-1]:
+#             if "This interest, margin" in row.text:
+#                 if row == "No Accruals to Report":
+#                     return 0
+#             return float(row)
+
+
+def parse_args():
+    """Parses command line arguments"""
+    parser = argparse.ArgumentParser(description="Generate IAM file for globeop")
     parser.add_argument(
-        "date",
-        type=datetime.date.fromisoformat,
+        "monthend",
         nargs="?",
-        default=datetime.date.today(),
-    )
-    parser.add_argument(
-        "--save",
-        action="store_true",
-        default=False,
-        help="store in proper folders",
+        type=datetime.date.fromisoformat,
+        default=datetime.date.today().replace(day=1) - datetime.timedelta(days=1),
     )
     parser.add_argument(
         "--accept",
+        "-a",
         action="store_true",
         default=False,
         help="accept the differences are within tolerance and edit the csv accordingly",
     )
-    args = parser.parse_args()
-    start = datetime.date(args.date.year, args.date.month, 1)
+    return parser.parse_args()
 
-    for cp in counterparties:
-        download_messages(em, cp, start, args.date, args.save)
 
-    interest_amounts = get_interest(counterparties, save=args.save)
-    start, end = start_end(args.date)
+def get_statement_totals(counterparties, month, year):
+    interest_amounts = defaultdict(float)
+    for counterparty in counterparties:
+        interest_counterparty = InterestCounterparty[counterparty](month, year)
+        for f in interest_counterparty.yield_files():
+            interest_amounts[counterparty] += interest_counterparty.get_interest_amount(
+                f
+            )
+    return pd.DataFrame.from_dict(
+        interest_amounts, orient="index", columns=["statement_interest"]
+    ).rename(index={"BoA": "BAML_ISDA"})
+
+
+def main():
+    args = parse_args()
+    em = ExchangeMessage()
+    download_messages(
+        em,
+        InterestCounterparty._registry.keys(),
+        args.monthend.replace(day=1),
+        args.monthend + relativedelta(months=1),
+    )
     global df
+    df = get_statement_totals(
+        InterestCounterparty._registry.keys(), args.monthend.month, args.monthend.year
+    )
+    serenitas_calculations = export_data(
+        args.monthend - relativedelta(months=1) + datetime.timedelta(days=1),
+        args.monthend,
+    )
     df = pd.merge(
-        export_data(start, end).groupby("broker").sum(),
-        interest_amounts,
+        serenitas_calculations.groupby("broker").sum(),
+        df,
         how="outer",
         left_index=True,
         right_index=True,
     )
     df = df.fillna(0)
-    df["difference"] = df["amount"] - df["monthly_statement"]
+    df["difference"] = df["amount"] - df["statement_interest"]
     if args.accept:
-        ssnc_df = export_data(start, end)
-        for k, v in df["difference"].items():
-            ssnc_df.loc[k].loc["CSH_CASH"] -= v
-        from io import StringIO
-        from serenitas.utils.exchange import ExchangeMessage, FileAttachment
-
+        for cp, difference in df["difference"].items():
+            # Match with counterparty if within tolerance
+            serenitas_calculations.loc[cp].loc["CSH_CASH"] -= difference
         buf = StringIO()
-        ssnc_df.to_csv(buf)
-        attach = [
-            FileAttachment(
-                name=f"{start:%b}.csv",
-                content=buf.getvalue().encode(),
-            )
-        ]
-        em = ExchangeMessage()
+        serenitas_calculations.to_csv(buf)
         em.send_email(
-            subject=f"Allocation of Interest for {start:%b}",
-            body="Please see attached",
+            subject=f"Allocation of Interest for {args.monthend.month:%b}",
+            body="Please see attached for allocation of interest by strategy and counterparty",
             to_recipients=("serenitas.otc@sscinc.com", "SERENITAS.ops@sscinc.com"),
             cc_recipients=("nyops@lmcg.com",),
-            attach=attach,
+            attach=[
+                FileAttachment(
+                    name=f"{args.monthend:%b-%y}.csv", content=buf.getvalue().encode()
+                )
+            ],
         )
+
+
+if __name__ == "__main__":
+    main()