aboutsummaryrefslogtreecommitdiffstats
path: root/python/collateral/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/collateral/common.py')
-rw-r--r--python/collateral/common.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/python/collateral/common.py b/python/collateral/common.py
index 882a3a74..64498fca 100644
--- a/python/collateral/common.py
+++ b/python/collateral/common.py
@@ -1,5 +1,7 @@
import datetime
import logging
+import subprocess
+from bs4 import BeautifulSoup
import pandas as pd
from exchangelib import HTMLBody
from sqlalchemy.engine import Engine
@@ -102,3 +104,25 @@ def send_email(d: datetime.date, df: pd.DataFrame) -> None:
["serenitas.otc@sscinc.com"],
["nyops@lmcg.com"],
)
+
+
+def load_pdf(file_path):
+ proc = subprocess.run(
+ ["pdftohtml", "-xml", "-stdout", "-i", file_path.as_posix()],
+ capture_output=True,
+ )
+ soup = BeautifulSoup(proc.stdout, features="lxml")
+ l = soup.findAll("text")
+ l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"])))
+ return l
+
+
+def get_col(l, top, bottom, left, right):
+ return [
+ c.text
+ for c in l
+ if int(c["left"]) >= left
+ and int(c["left"]) < right
+ and int(c["top"]) >= top
+ and int(c["top"]) < bottom
+ ]