aboutsummaryrefslogtreecommitdiffstats
path: root/python/collateral/citi.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/collateral/citi.py')
-rw-r--r--python/collateral/citi.py25
1 files changed, 1 insertions, 24 deletions
diff --git a/python/collateral/citi.py b/python/collateral/citi.py
index 5ea8ce60..acda4b1f 100644
--- a/python/collateral/citi.py
+++ b/python/collateral/citi.py
@@ -1,8 +1,7 @@
import pandas as pd
-import subprocess
-from bs4 import BeautifulSoup
from pandas.tseries.offsets import BDay
from . import DAILY_DIR, bus_day
+from .common import load_pdf, get_col
def load_file(d):
@@ -33,28 +32,6 @@ def download_files(count=20):
p.write_bytes(attach.content)
-def load_pdf(file_path):
- proc = subprocess.run(
- ["pdftohtml", "-xml", "-stdout", "-i", file_path.as_posix()],
- capture_output=True,
- )
- soup = BeautifulSoup(proc.stdout, features="lxml")
- l = soup.findAll("text")
- l = sorted(l, key=lambda x: (int(x["top"]), int(x["left"])))
- return l
-
-
-def get_col(l, top, bottom, left, right):
- return [
- c.text
- for c in l
- if int(c["left"]) >= left
- and int(c["left"]) < right
- and int(c["top"]) >= top
- and int(c["top"]) < bottom
- ]
-
-
def parse_num(s):
s = s.replace(",", "")
if s[0] == "(":