diff options
Diffstat (limited to 'python/download_emails.py')
| -rw-r--r-- | python/download_emails.py | 41 |
1 files changed, 22 insertions, 19 deletions
diff --git a/python/download_emails.py b/python/download_emails.py index 6127e38c..33a78b85 100644 --- a/python/download_emails.py +++ b/python/download_emails.py @@ -30,17 +30,15 @@ def ListMessagesWithLabels(service, user_id, label_ids=[]): try: response = service.users().messages().list(userId=user_id, labelIds=label_ids).execute() - messages = [] if 'messages' in response: - messages.extend(response['messages']) + yield from response['messages'] while 'nextPageToken' in response: page_token = response['nextPageToken'] response = service.users().messages().list(userId=user_id, labelIds=label_ids, pageToken=page_token).execute() - messages.extend(response['messages']) + yield from response['messages'] - return messages except errors.HttpError as error: print(json.loads(error.content.decode('utf-8'))['error']['message']) @@ -97,6 +95,7 @@ def labels_dict(service, user_id): class GmailMessage(EmailMessage): _service = get_gmail_service() + _labels = labels_dict(_service, 'me') def msgdict(self): return {'raw': base64.urlsafe_b64encode(self.as_bytes()).decode()} @@ -110,6 +109,18 @@ class GmailMessage(EmailMessage): except errors.HttpError as error: print('An error occurred: %s' % error) + @staticmethod + def list_msg_ids(label, start_history_id=None): + if start_history_id is not None: + return ListHistory(GmailMessage._service, + 'me', + label_id=GmailMessage._labels[label], + start_history_id=start_history_id) + else: + return ListMessagesWithLabels(GmailMessage._service, + 'me', + label_ids=[GmailMessage._labels[label]]) + @classmethod def from_id(cls, msg_id, user_id='me'): try: @@ -123,31 +134,23 @@ class GmailMessage(EmailMessage): except errors.HttpError as error: print(json.loads(error.content.decode('utf-8'))['error']['message']) - def save_emails(update=True): """Download new emails that were labeled swaptions.""" labelsdict = labels_dict(GmailMessage._service, 'me') - p = Path(os.getenv("DATA_DIR")) / Path('swaptions') + DATA_DIR = Path(os.getenv("DATA_DIR")) if update: try: - with open(os.path.join(os.environ['DATA_DIR'], '.lastHistoryId')) as fh: - last_history_id = int(fh.read()) + last_history_id = int((DATA_DIR / ".lastHistoryID").read_text()) except FileNotFoundError: sys.exit() - email_list = ListHistory(GmailMessage._service, - 'me', - label_id=labelsdict['swaptions'], - start_history_id=last_history_id) existing_msgs = [] else: - email_list = ListMessagesWithLabels(GmailMessage._service, - 'me', - labelsdict['swaptions']) existing_msgs = set(str(x).split("_")[1] for x in p.iterdir() if x.is_file()) + last_history_id = None - for msg in email_list: + for msg in GmailMessage.list_msg_ids('swaptions', last_history_id): if msg['id'] in existing_msgs: continue try: @@ -164,14 +167,14 @@ def save_emails(update=True): logging.error("error decoding " + msg['id']) continue else: - email = p / "{:%Y-%m-%d %H-%M-%S}_{}".format(date, msg['id']) + email = (DATA_DIR / "swaptions" / + f"{date:%Y-%m-%d %H-%M-%S}_{msg['id']}") with email.open("w") as fh: fh.write(subject + "\r\n") fh.write(content) try: new_history_id = message.history_id - with open(os.path.join(os.environ['DATA_DIR'], '.lastHistoryId'), 'w') as fh: - fh.write(new_history_id) + (DATA_DIR / ".lastHistoryID").write_text(message.history_id) except UnboundLocalError: pass |
