diff --git a/main.py b/main.py index 12c469e..889fc60 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,7 @@ import asyncio +import atexit import random +from signal import signal from twscrape import API import os import json @@ -11,17 +13,28 @@ from dotenv import load_dotenv load_dotenv() OUTPUT_FILE = os.getenv("OUTPUT_FILE", "tweets.json") +_results = [] + + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) def check_if_json_exists(file_path): + logger.info(f"Checking if JSON exists at {file_path}") return os.path.isfile(file_path) and os.path.getsize(file_path) > 0 def load_json(file_path): + logger.info(f"Loading data from {file_path}") with open(file_path, "r", encoding="utf-8") as f: return json.load(f) +def write_json(file_path, data): + logger.info(f"Writing data to {file_path}") + with open(file_path, "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=4) + async def main(): + global _results api = API() # or API("path-to.db") – default is `accounts.db` # ADD ACCOUNTS (for CLI usage see next readme section) @@ -40,20 +53,18 @@ async def main(): if check_if_json_exists(OUTPUT_FILE): _results = load_json(OUTPUT_FILE).get("tweets", []) - else: - _results = [] - async for rep in api.search("AI", limit=5): + async for rep in api.search("AI"): try: _results.append(rep.json()) + logger.info("Appended tweet JSON") except Exception: logger.error("Failed to parse tweet JSON") - await asyncio.sleep(random.uniform(7, 15)) # random delay between 7 and 15 seconds - - with open(OUTPUT_FILE, "w", encoding="utf-8") as f: - f.write(json.dumps({"tweets": _results}, ensure_ascii=False, indent=4)) + await asyncio.sleep(random.uniform(11, 23)) # random delay between 7 and 15 seconds + + atexit.register(lambda: write_json(OUTPUT_FILE, {"tweets": _results})) if __name__ == "__main__": asyncio.run(main()) \ No newline at end of file