aboutsummaryrefslogtreecommitdiffstats
path: root/python/download_emails.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/download_emails.py')
-rw-r--r--python/download_emails.py41
1 files changed, 22 insertions, 19 deletions
diff --git a/python/download_emails.py b/python/download_emails.py
index 6127e38c..33a78b85 100644
--- a/python/download_emails.py
+++ b/python/download_emails.py
@@ -30,17 +30,15 @@ def ListMessagesWithLabels(service, user_id, label_ids=[]):
try:
response = service.users().messages().list(userId=user_id,
labelIds=label_ids).execute()
- messages = []
if 'messages' in response:
- messages.extend(response['messages'])
+ yield from response['messages']
while 'nextPageToken' in response:
page_token = response['nextPageToken']
response = service.users().messages().list(userId=user_id,
labelIds=label_ids,
pageToken=page_token).execute()
- messages.extend(response['messages'])
+ yield from response['messages']
- return messages
except errors.HttpError as error:
print(json.loads(error.content.decode('utf-8'))['error']['message'])
@@ -97,6 +95,7 @@ def labels_dict(service, user_id):
class GmailMessage(EmailMessage):
_service = get_gmail_service()
+ _labels = labels_dict(_service, 'me')
def msgdict(self):
return {'raw': base64.urlsafe_b64encode(self.as_bytes()).decode()}
@@ -110,6 +109,18 @@ class GmailMessage(EmailMessage):
except errors.HttpError as error:
print('An error occurred: %s' % error)
+ @staticmethod
+ def list_msg_ids(label, start_history_id=None):
+ if start_history_id is not None:
+ return ListHistory(GmailMessage._service,
+ 'me',
+ label_id=GmailMessage._labels[label],
+ start_history_id=start_history_id)
+ else:
+ return ListMessagesWithLabels(GmailMessage._service,
+ 'me',
+ label_ids=[GmailMessage._labels[label]])
+
@classmethod
def from_id(cls, msg_id, user_id='me'):
try:
@@ -123,31 +134,23 @@ class GmailMessage(EmailMessage):
except errors.HttpError as error:
print(json.loads(error.content.decode('utf-8'))['error']['message'])
-
def save_emails(update=True):
"""Download new emails that were labeled swaptions."""
labelsdict = labels_dict(GmailMessage._service, 'me')
- p = Path(os.getenv("DATA_DIR")) / Path('swaptions')
+ DATA_DIR = Path(os.getenv("DATA_DIR"))
if update:
try:
- with open(os.path.join(os.environ['DATA_DIR'], '.lastHistoryId')) as fh:
- last_history_id = int(fh.read())
+ last_history_id = int((DATA_DIR / ".lastHistoryID").read_text())
except FileNotFoundError:
sys.exit()
- email_list = ListHistory(GmailMessage._service,
- 'me',
- label_id=labelsdict['swaptions'],
- start_history_id=last_history_id)
existing_msgs = []
else:
- email_list = ListMessagesWithLabels(GmailMessage._service,
- 'me',
- labelsdict['swaptions'])
existing_msgs = set(str(x).split("_")[1] for x in p.iterdir() if x.is_file())
+ last_history_id = None
- for msg in email_list:
+ for msg in GmailMessage.list_msg_ids('swaptions', last_history_id):
if msg['id'] in existing_msgs:
continue
try:
@@ -164,14 +167,14 @@ def save_emails(update=True):
logging.error("error decoding " + msg['id'])
continue
else:
- email = p / "{:%Y-%m-%d %H-%M-%S}_{}".format(date, msg['id'])
+ email = (DATA_DIR / "swaptions" /
+ f"{date:%Y-%m-%d %H-%M-%S}_{msg['id']}")
with email.open("w") as fh:
fh.write(subject + "\r\n")
fh.write(content)
try:
new_history_id = message.history_id
- with open(os.path.join(os.environ['DATA_DIR'], '.lastHistoryId'), 'w') as fh:
- fh.write(new_history_id)
+ (DATA_DIR / ".lastHistoryID").write_text(message.history_id)
except UnboundLocalError:
pass