diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/collateral/common.py | 27 |
1 files changed, 18 insertions, 9 deletions
diff --git a/python/collateral/common.py b/python/collateral/common.py index abbab66b..d4e9601b 100644 --- a/python/collateral/common.py +++ b/python/collateral/common.py @@ -1,4 +1,5 @@ import datetime +import math import logging import subprocess from bs4 import BeautifulSoup @@ -146,15 +147,23 @@ def load_pdf(file_path, pages=False): return l -def get_col(l, top, bottom, left, right): - return [ - c.text - for c in l - if int(c["left"]) >= left - and int(c["left"]) < right - and int(c["top"]) >= top - and int(c["top"]) < bottom - ] +def get_col(l, top, bottom, left, right, bbox=False): + actual_left, actual_right = math.inf, -math.inf + r = [] + for c in l: + if ( + int(c["left"]) >= left + and int(c["left"]) + int(c["width"]) < right + and int(c["top"]) >= top + and int(c["top"]) + int(c["height"]) < bottom + ): + r.append(c.text) + actual_left = min(int(c["left"]), actual_left) + actual_right = max(int(c["left"]) + int(c["width"]), actual_right) + if bbox: + return r, (actual_left, actual_right) + else: + return r def prev_business_day(d: datetime.date): |
