from apiclient.discovery import build from apiclient import errors from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow import os import json import base64 from email.message import EmailMessage import email from email.utils import parseaddr, parsedate_to_datetime from bs4 import BeautifulSoup from pytz import timezone SCOPES = ['https://www.googleapis.com/auth/gmail.modify'] CLIENT_SECRET_FILE = os.path.expanduser('~/client_id.json') APPLICATION_NAME = 'Famille' def get_gmail_service(): """Gets valid user credentials from storage. If nothing has been stored, or if the stored credentials are invalid, the OAuth2 flow is completed to obtain the new credentials. Returns: Credentials, the obtained credential. """ credential_dir = os.path.expanduser('~/.credentials') if not os.path.exists(credential_dir): os.makedirs(credential_dir) credential_path = os.path.join(credential_dir, 'news.horel@gmail.com') try: credentials = Credentials.from_authorized_user_file(credential_path) except: flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES) credentials = flow.run_console() to_save = {} for attr in["token", "refresh_token", "id_token", "token_uri", "client_id", "client_secret", "scopes"]: to_save[attr] = getattr(credentials, attr) with open(credential_path, "w") as fh: json.dump(to_save, fh) service = build('gmail', 'v1', credentials=credentials) return service class GmailMessage(EmailMessage): _service = get_gmail_service() def msgdict(self): return {'raw': base64.urlsafe_b64encode(self.as_bytes()).decode()} def send(self): try: message = (self._service.users().messages(). send(userId='me', body=self.msgdict()) .execute()) print('Message Id: %s' % message['id']) except errors.HttpError as error: print('An error occurred: %s' % error) @classmethod def from_id(cls, msg_id, user_id='me'): try: message = (cls._service.users().messages(). get(userId=user_id, id=msg_id, format='raw').execute()) return email.message_from_bytes( base64.urlsafe_b64decode(message['raw']), policy=email.policy.EmailPolicy()) except errors.HttpError as error: print(json.loads(error.content.decode('utf-8'))['error']['message']) def ListMessagesWithLabels(service, user_id, label_ids=[]): """List all Messages of the user's mailbox with label_ids applied. Args: service: Authorized Gmail API service instance. user_id: User's email address. The special value "me" can be used to indicate the authenticated user. label_ids: Only return Messages with these labelIds applied. Returns: List of Messages that have all required Labels applied. Note that the returned list contains Message IDs, you must use get with the appropriate id to get the details of a Message. """ try: response = service.users().messages().list(userId=user_id, labelIds=label_ids).execute() messages = [] if 'messages' in response: messages.extend(response['messages']) while 'nextPageToken' in response: page_token = response['nextPageToken'] response = service.users().messages().list(userId=user_id, labelIds=label_ids, pageToken=page_token).execute() messages.extend(response['messages']) return messages except errors.HttpError as error: print(json.loads(error.content.decode('utf-8'))['error']['message']) def labels_dict(service, user_id): """Returns a dictionary mapping labels to labelids. Args: service: Authorized Gmail API service instance. user_id: User's email address. The special value "me" Returns: dictionary mapping labels to labelids. """ try: response = service.users().labels().list(userId=user_id).execute() labels = response['labels'] return {label['name']: label['id'] for label in labels} except errors.HttpError as error: print(json.loads(error.content.decode('utf-8'))['error']['message']) def extract_response(content): soup = BeautifulSoup(content, 'lxml') reply = soup.find('div', dir='ltr') return reply if __name__ == "__main__": import pickle import re import sqlite3 regex = re.compile("[^+]*\+([^@]*)") db = sqlite3.connect("famille.db", detect_types=sqlite3.PARSE_DECLTYPES) db.row_factory = sqlite3.Row db.execute("PRAGMA foreign_keys=ON") sql_str = "INSERT INTO comments" \ "(date, news_id, user_id, content, content_cache) " \ "VALUES(?, ?, ?, ?, ?)" try: with open('.pickle', 'rb') as fh: already_seen = pickle.load(fh) except FileNotFoundError: already_seen = set() for msg_id in ListMessagesWithLabels(GmailMessage._service, 'me', 'INBOX'): if msg_id['id'] in already_seen: continue mail = GmailMessage.from_id(msg_id['id']) m = regex.match(mail['To']) if m: _, email_addr = parseaddr(mail['From']) c = db.execute("SELECT id FROM users WHERE email = ?", (email_addr,)) user_id, = c.fetchone() c.close() body = mail.get_body() if body.get_content_type() == 'text/html': comment_cache = extract_response(body.get_content()) comment = comment_cache.get_text() comment_cache = str(comment_cache) elif body.get_content_type() == 'text/plain': comment = comment_cache = body.get_content() news_id = int(m.groups()[0]) date = (parsedate_to_datetime(mail['Date']). astimezone(timezone('utc')). replace(tzinfo=None)) try: db.execute(sql_str, (date, news_id, user_id, comment, comment_cache)) except sqlite3.Error as e: print(e) db.rollback() continue db.commit() print(msg_id['id'], news_id, email_addr, date, comment) already_seen.add(msg_id['id']) with open('.pickle', 'wb') as fh: pickle.dump(already_seen, fh)