aboutsummaryrefslogtreecommitdiffstats
path: root/python/download_emails.py
blob: 1453b51c36a2c83d56c211ee03fd4f2c12a89eb4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from datetime import datetime
import os
from pathlib import Path

from apiclient.discovery import build
from apiclient import errors
from httplib2 import Http
import oauth2client
from oauth2client import client
from oauth2client import tools
import json
import base64
import binascii

import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()

SCOPES = 'https://www.googleapis.com/auth/gmail.readonly'
CLIENT_SECRET_FILE = 'secret.json'
APPLICATION_NAME = 'Swaptions'


def get_credentials():
    """Gets valid user credentials from storage.

    If nothing has been stored, or if the stored credentials are invalid,
    the OAuth2 flow is completed to obtain the new credentials.

    Returns:
        Credentials, the obtained credential.
    """
    home_dir = os.path.expanduser('~')
    credential_dir = os.path.join(home_dir, '.credentials')
    if not os.path.exists(credential_dir):
        os.makedirs(credential_dir)
    credential_path = os.path.join(credential_dir,
                                   'gmail-quickstart.json')

    store = oauth2client.file.Storage(credential_path)
    credentials = store.get()
    if not credentials or credentials.invalid:
        flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
        flow.user_agent = APPLICATION_NAME
        credentials = tools.run_flow(flow, store, flags)
        print('Storing credentials to ' + credential_path)
    return credentials

def ListMessagesWithLabels(service, user_id, label_ids=[]):
    """List all Messages of the user's mailbox with label_ids applied.

    Args:
     service: Authorized Gmail API service instance.
     user_id: User's email address. The special value "me"
     can be used to indicate the authenticated user.
     label_ids: Only return Messages with these labelIds applied.

    Returns:
     List of Messages that have all required Labels applied. Note that the
     returned list contains Message IDs, you must use get with the
     appropriate id to get the details of a Message.
    """
    try:
        response = service.users().messages().list(userId=user_id,
                                                   labelIds=label_ids).execute()
        messages = []
        if 'messages' in response:
            messages.extend(response['messages'])
        while 'nextPageToken' in response:
            page_token = response['nextPageToken']
            response = service.users().messages().list(userId=user_id,
                                                       labelIds=label_ids,
                                                       pageToken=page_token).execute()
            messages.extend(response['messages'])

        return messages
    except errors.HttpError as error:
        print(json.loads(error.content.decode('utf-8'))['error']['message'])

def getListLabels(service, user_id):
    try:
        response = service.users().labels().list(userId=user_id).execute()
        labels = response['labels']
        return {label['name']: label['id'] for label in labels}
    except errors.HttpError as error:
        print(json.loads(error.content.decode('utf-8'))['error']['message'])

def get_msg(service, user_id, msg_id):
    try:
        message = service.users().messages().get(userId=user_id, id=msg_id, format='full').execute()
        return message
    except errors.HttpError as error:
        print(json.loads(error.content.decode('utf-8'))['error']['message'])

def msg_content(msg):
    subject = [x['value'] for x in msg['payload']['headers'] if x['name']=='Subject'][0]
    content = base64.b64decode(msg['payload']['body']['data']).decode('utf-8')
    return subject, content

def main():
    """Shows basic usage of the Gmail API.

    Creates a Gmail API service object and outputs a list of label names
    of the user's Gmail account.
    """
    credentials = get_credentials()
    service = build('gmail', 'v1', http=credentials.authorize(Http()))
    labelsdict = getListLabels(service, 'me')
    p = Path('quotes')
    current_msgs = set([f.name for f in p.iterdir() if f.is_file()])
    for msg in ListMessagesWithLabels(service, 'me', labelsdict['swaptions']):
        if msg['id'] not in current_msgs:
            try:
                subject, content = msg_content(get_msg(service, 'me', msg['id']))
            except (binascii.Error, KeyError):
                print("error decoding {0}".format(msg['id']))
                continue
            else:
                email = Path("../../data/swaptions/{0}".format(msg['id']))
                with email.open("w") as fh:
                    fh.write(subject + "\r\n")
                    fh.write(content)

if __name__ == '__main__':
    main()