diff options
| -rw-r--r-- | python/download_emails.py | 6 | ||||
| -rw-r--r-- | python/parse_emails.py | 9 |
2 files changed, 8 insertions, 7 deletions
diff --git a/python/download_emails.py b/python/download_emails.py index 21d5794a..a9139ea3 100644 --- a/python/download_emails.py +++ b/python/download_emails.py @@ -158,19 +158,19 @@ def save_emails(update=True): date = parsedate_to_datetime(message['date']) if date.tzinfo is None: date = date.replace(tzinfo=timezone('utc')) + date = date.astimezone(timezone('America/New_York')) body = message.get_body('plain') content = body.get_content() except (KeyError, UnicodeDecodeError) as e: logging.error("error decoding " + msg['id']) continue else: - email = p / msg['id'] + email = p / "{:%Y-%m-%d %H-%M-%S}_{}".format(date, msg['id']) with email.open("w") as fh: - fh.write("{:.0f}\r\n".format(date.timestamp()*1000)) fh.write(subject + "\r\n") fh.write(content) try: - new_history_id = msg['id'] + new_history_id = message.history_id with open(os.path.join(os.environ['DATA_DIR'], '.lastHistoryId'), 'w') as fh: fh.write(new_history_id) except UnboundLocalError: diff --git a/python/parse_emails.py b/python/parse_emails.py index 32c1f6d6..a0d41dfb 100644 --- a/python/parse_emails.py +++ b/python/parse_emails.py @@ -299,9 +299,8 @@ subject_nomura = re.compile("(?:Fwd:)?CDX (IG|HY)(\d{2}).*- REF:[^\d]*([\d.]+)") subject_gs = re.compile("GS (IG|HY)(\d{2}) 5y.*- Ref [^\d]*([\d.]+)") subject_sg = re.compile("SG OPTIONS - CDX (IG|HY) S(\d{2}).* REF[^\d]*([\d.]+)") -def parse_email(email): +def parse_email(email, date_received): with open(email.path, "rt") as fh: - date_received = datetime.datetime.fromtimestamp(int(fh.readline())/1000) subject = next(fh) for source in ['BAML', 'MS', 'NOMURA', 'GS', 'SG']: m = globals()['subject_'+source.lower()].match(subject) @@ -385,11 +384,13 @@ if __name__=="__main__": except FileNotFoundError: already_uploaded = {} for f in emails: - if f.name in already_uploaded: + date_received, msg_id = f.name.split("_") + date_received = datetime.datetime.strptime(date_received, "%Y-%m-%d %H-%M-%S") + if msg_id in already_uploaded: continue else: try: - key, (option_stack, fwd_index) = parse_email(f) + key, (option_stack, fwd_index) = parse_email(f, date_received) except RuntimeError as e: logging.error(e) else: |
