diff options
Diffstat (limited to 'main.py')
| -rw-r--r-- | main.py | 182 |
1 files changed, 0 insertions, 182 deletions
diff --git a/main.py b/main.py deleted file mode 100644 index be565a1..0000000 --- a/main.py +++ /dev/null @@ -1,182 +0,0 @@ -from bottle import route, run, request, static_file - -from scraper import Driver -from api import RequestHandler - -from multiprocessing import Process, Queue -from Queue import Empty -from time import sleep -from json import dumps -from uuid import uuid1 -import tarfile -import os.path -import os -from glob import glob - - -long_queue = Queue() -short_queue = Queue() -lookup_queue = Queue() -short_lookup_queue = Queue() -processes = [] -done_queue = Queue() - - -def start(): - global long_queue, short_queue, lookup_queue, done_queue, processes,\ - short_lookup_queue - processes = [] - long_queue = Queue() - short_queue = Queue() - short_lookup_queue = Queue() - lookup_queue = Queue() - with open("api_accounts.txt") as f: - for line in f: - credentials = line.strip().split()[2:] - handler = RequestHandler(*credentials) - p = Process(target=api_target, args=(handler, long_queue, - short_queue, - lookup_queue, done_queue)) - processes.append(p) - p.daemon = True - p.start() - - with open("scraping_accounts.txt") as f: - for line in f: - credentials = line.strip().split()[:2] - driver = Driver(*credentials) - p = Process(target=scraper_target, args=(driver, short_queue, - done_queue)) - processes.append(p) - p.daemon = True - p.start() - - -@route('/short_lookup', method='POST') -def short_lookup(): - query_list = request.forms.list.split(",") - user_list = zip(*[iter(query_list)] * 2) # this is dark magic - short_lookup_queue.put(user_list) - - -@route('/restart') -def restart(): - global processes - for p in processes: - p.terminate() - start() - - -@route('/long') -def long(): - user_id = request.query.id - long_queue.put(user_id) - - -@route('/short') -def short(): - user_id = request.query.id - user_name = request.query.user_name - short_queue.put((user_id, user_name)) - - -@route('/lookup', method='POST') -def lookup(): - id_list = request.forms.list.split(",") - lookup_queue.put(id_list) - - -@route('/status') -def status(): - answer_dict = { - "long": long_queue.qsize(), - "short": short_queue.qsize(), - "lookup": lookup_queue.qsize(), - "short_lookup": short_lookup_queue.qsize(), - "done": done_queue.qsize(), - "processes": len([p for p in processes if p.is_alive()]), - "users": len(glob("data/users/[0-9]*.txt")), - "lookups": len(glob("data/users/lookup*.txt")) - } - return dumps(answer_dict) - - -@route('/fetch') -def fetch(): - for filename in glob("data/users/*.tar.gz"): - os.remove(filename) - - def get_filenames(): - try: - while True: - yield done_queue.get(False) - except Empty: - pass - - filename = os.path.join("data", "users", "archive-" - + str(uuid1()) + ".tar.gz") - with tarfile.open(filename, "w:gz") as tar: - for name in get_filenames(): - tar.add(name) - return static_file(filename, root=".") - - -def scraper_target(driver, short_queue, done_queue): - while True: - try: - user_id, user_name = short_queue.get(False) - except Empty: - pass - else: - filename = driver.get_followers(user_id, user_name) - done_queue.put(filename) - finally: - sleep(0.5) - - -def api_target(handler, long_queue, short_queue, lookup_queue, done_queue): - while True: - if handler.ready("followers"): - try: - user_id = long_queue.get(False) - except Empty: - try: - user_id = short_queue.get(False)[0] - except Empty: - pass - else: - filename = handler.get_followers(user_id) - done_queue.put(filename) - continue - else: - filename = handler.get_followers(user_id) - done_queue.put(filename) - continue - if handler.ready("lookup"): - try: - users_list = lookup_queue.get(False) - except Empty: - try: - user_list = short_lookup_queue.get(False) - except Empty: - pass - else: - filename = handler.lookup(user[0] for user in user_list) - done_queue.put(filename) - else: - filename = handler.lookup(users_list) - done_queue.put(filename) - else: - try: - user_list = short_lookup_queue.get(False) - except Empty: - pass - else: - filename = handler.short_lookup(user_list) - done_queue.put(filename) - - -if __name__ == "__main__": - import sys - start() - run(host="0.0.0.0", port=int(sys.argv[1])) |
