aboutsummaryrefslogtreecommitdiffstats
path: root/python/download_emails.py
blob: aa0e33d004ef81c14c6ae74aab8aead1cdba878b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from datetime import datetime
import os
from pathlib import Path

from apiclient.discovery import build
from apiclient import errors
from httplib2 import Http
import oauth2client
from oauth2client import client
from oauth2client import tools
import json
import base64
import binascii
from send_email import get_gmail_service
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()

SCOPES = 'https://www.googleapis.com/auth/gmail.readonly'
CLIENT_SECRET_FILE = 'secret.json'
APPLICATION_NAME = 'Swaptions'


def ListMessagesWithLabels(service, user_id, label_ids=[]):
    """List all Messages of the user's mailbox with label_ids applied.

    Args:
     service: Authorized Gmail API service instance.
     user_id: User's email address. The special value "me"
     can be used to indicate the authenticated user.
     label_ids: Only return Messages with these labelIds applied.

    Returns:
     List of Messages that have all required Labels applied. Note that the
     returned list contains Message IDs, you must use get with the
     appropriate id to get the details of a Message.
    """
    try:
        response = service.users().messages().list(userId=user_id,
                                                   labelIds=label_ids).execute()
        messages = []
        if 'messages' in response:
            messages.extend(response['messages'])
        while 'nextPageToken' in response:
            page_token = response['nextPageToken']
            response = service.users().messages().list(userId=user_id,
                                                       labelIds=label_ids,
                                                       pageToken=page_token).execute()
            messages.extend(response['messages'])

        return messages
    except errors.HttpError as error:
        print(json.loads(error.content.decode('utf-8'))['error']['message'])

def getListLabels(service, user_id):
    try:
        response = service.users().labels().list(userId=user_id).execute()
        labels = response['labels']
        return {label['name']: label['id'] for label in labels}
    except errors.HttpError as error:
        print(json.loads(error.content.decode('utf-8'))['error']['message'])

def get_msg(service, user_id, msg_id):
    try:
        message = service.users().messages().get(userId=user_id, id=msg_id, format='full').execute()
        return message
    except errors.HttpError as error:
        print(json.loads(error.content.decode('utf-8'))['error']['message'])

def msg_content(msg):
    subject = [x['value'] for x in msg['payload']['headers'] if x['name']=='Subject'][0]
    content = base64.b64decode(msg['payload']['body']['data']).decode('utf-8')
    return subject, content

def main():
    """Shows basic usage of the Gmail API.

    Creates a Gmail API service object and outputs a list of label names
    of the user's Gmail account.
    """
    service = get_gmail_service()
    labelsdict = getListLabels(service, 'me')
    p = Path('../../data/swaptions/')
    current_msgs = set([f.name for f in p.iterdir() if f.is_file()])
    for msg in ListMessagesWithLabels(service, 'me', labelsdict['swaptions']):
        if msg['id'] not in current_msgs:
            try:
                subject, content = msg_content(get_msg(service, 'me', msg['id']))
            except (binascii.Error, KeyError):
                print("error decoding {0}".format(msg['id']))
                continue
            else:
                email = p / msg['id']
                with email.open("w") as fh:
                    fh.write(subject + "\r\n")
                    fh.write(content)

if __name__ == '__main__':
    main()