From 34b4b25a2fc9f317a88c48ea8beb93ef1c32740e Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Mon, 24 Jul 2017 11:17:41 -0400 Subject: prevent duplicate comments by keeping a cache --- email_helpers.py | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'email_helpers.py') diff --git a/email_helpers.py b/email_helpers.py index 8f5b2ac..eeb2168 100644 --- a/email_helpers.py +++ b/email_helpers.py @@ -121,6 +121,7 @@ def extract_response(content): return reply if __name__ == "__main__": + import pickle import re import sqlite3 @@ -132,8 +133,15 @@ if __name__ == "__main__": sql_str = "INSERT INTO comments" \ "(date, news_id, user_id, content, content_cache) " \ "VALUES(?, ?, ?, ?, ?)" + try: + with open('.pickle') as fh: + already_seen = pickle.load(fh) + except FileNotFoundError: + already_seen = set() for msg_id in ListMessagesWithLabels(GmailMessage._service, 'me', 'INBOX'): + if msg_id in already_seen: + continue mail = GmailMessage.from_id(msg_id['id']) m = regex.match(mail['To']) if m: @@ -160,3 +168,5 @@ if __name__ == "__main__": continue db.commit() print(msg_id['id'], news_id, email_addr, date, comment) + already_seen.add(msg_id['id']) + pickle.dump(already_seen, '.pickle') -- cgit v1.2.3-70-g09d2