From ece1d828d53d6123fcecb5ea8bf9b126d1728ccc Mon Sep 17 00:00:00 2001 From: Thibaut Horel Date: Fri, 24 Oct 2014 12:16:51 -0400 Subject: Add code --- twitter/api_accounts.txt | 1 - twitter/dispatcher.py | 2 -- twitter/scraper.py | 3 +-- twitter/stream.py | 7 +++---- 4 files changed, 4 insertions(+), 9 deletions(-) (limited to 'twitter') diff --git a/twitter/api_accounts.txt b/twitter/api_accounts.txt index 836b10d..cd0dea6 100644 --- a/twitter/api_accounts.txt +++ b/twitter/api_accounts.txt @@ -1,4 +1,3 @@ -thibaut.horel@gmail.com Dlmatc06 GT3ILinlqcuChZY2ueOb1Q 9Jx9WGyfNea35X2kYCAN8hh9WkZl6wD7b4yXkY 2291723059-dvaHVGA50FYgDtxxZZQoBU0MQYysdaYOFIyOeLa 70GdBOKCIQWliX1hllfgmek2vEvrnKBqm0bBfApbP38TO zaran.krleza+1@gmail.com i6rkXWj78 Fle9xRwFyXO3SV7zR7KDg 0rAzjUo6yyx0DtHR6EvIQPenynJKmLKgPvyGRqj4w 2304251221-ztXyr6HFBOuDbPiWqFQT3wWAQfW6iEw7RoQXrwW 6xf5T89H4wneiiSskuRtL8GWHhK0g84CNmPdCeAOiXCP8 zaran.krleza+6@gmail.com och9phoM6qu HIIXtDoVIbc54IFoMzRmAQ E57OPRvxIOH5CS2ROSBMs0jS0UY5lCMsxKEk1mBws 2315047123-0skfirkKYl78eo66TFc3g6pkqzuVWZLGYIQRLny m7kyeesr726sSyF8UTQCFYssphbhqPeVftbmC67uwvrrf zaran.krleza+7@gmail.com ohr8ID7xoo DhjatHIduiUWDfwCPy13Ig 9QYIrGugvMXeMSqe67t7ylIPC8XXfDlvRAM2mwB6Rs 2315047440-RSva8oO8Mz0KL4npovzOCsg3WEbY7JWgbXR5BeJ Oy8iIhQrsVH9D1eQ97sQPlTrExcKDtarLQEqpcXDO1fMl diff --git a/twitter/dispatcher.py b/twitter/dispatcher.py index 56fb9f7..2bba1c3 100644 --- a/twitter/dispatcher.py +++ b/twitter/dispatcher.py @@ -51,8 +51,6 @@ class Dispatcher: def add_user(self, user_id, user_name, followers_count): self.users[user_id] = user_name - if int(followers_count) >= 5000: - return if (not pa.isfile(pa.join("data", "users", user_id + ".txt")) and user_id not in self.current_followers): self.followers_queue[user_id] = (user_name, followers_count) diff --git a/twitter/scraper.py b/twitter/scraper.py index 49b116a..e912782 100644 --- a/twitter/scraper.py +++ b/twitter/scraper.py @@ -92,5 +92,4 @@ class Driver: if __name__ == "__main__": credentials = open("scraping_accounts.txt").readline().strip().split() driver = Driver(*credentials[:2]) - # driver.get_followers("23302126", "flipper509") - print driver.get_profile(100, "thibauthorel") + driver.get_followers("23302126", "flipper509") diff --git a/twitter/stream.py b/twitter/stream.py index 71cf615..4fe38c4 100644 --- a/twitter/stream.py +++ b/twitter/stream.py @@ -24,9 +24,7 @@ class Listener(StreamListener): def get_concepts(self, entities): hashtags = (hashtag["text"].lower() for hashtag in entities["hashtags"]) - users = (user["screen_name"].lower() - for user in entities["user_mentions"]) - return set(chain(hashtags, users)) + return set(hashtags) def on_status(self, tweet): concepts = self.get_concepts(tweet.entities) @@ -35,6 +33,7 @@ class Listener(StreamListener): str(tweet.user.friends_count), str(tweet.user.verified), tweet.created_at.isoformat()]) + print str(dict(tweet)) for concept in concepts: if concept in self.fhandlers: fh = self.fhandlers[concept] @@ -47,7 +46,7 @@ def process(filename, cred_file): concepts = [line.strip() for line in f] credentials = open(cred_file).readline().strip().split() os.chdir("data") - entities = [("#" + concept, "@" + concept) for concept in concepts] + entities = [("#" + concept) for concept in concepts] track = chain.from_iterable(entities) auth = OAuthHandler(*credentials[2:4]) auth.set_access_token(*credentials[4:]) -- cgit v1.2.3-70-g09d2