aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--python/quote_parsing/download_emails.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/python/quote_parsing/download_emails.py b/python/quote_parsing/download_emails.py
index 269d4dd6..3a7831c3 100644
--- a/python/quote_parsing/download_emails.py
+++ b/python/quote_parsing/download_emails.py
@@ -13,9 +13,23 @@ from gmail_helpers import GmailMessage
from email.utils import parsedate_to_datetime
+def print_citi_html2(soup):
+ l = []
+ for pre in soup.findAll("pre"):
+ for br in p.findAll("br"):
+ if isinstance(s, NavigableString):
+ l.append(unicodedata.normalize("NFKD", s))
+ elif isinstance(s, Tag) and s.name == "br":
+ l.append("\n")
+ else:
+ raise ValueError
+ return "\n".join(l)
+
def print_citi_html(email):
soup = BeautifulSoup(email.get_content(), features="lxml")
p = soup.find("p")
+ if p is None:
+ return print_citi_html2(soup)
s = p.next
if isinstance(s, NavigableString):
l = [unicodedata.normalize("NFKD", s)]