import asyncio import atexit import random from signal import signal from twscrape import API import os import json import logging from dotenv import load_dotenv # Configuration load_dotenv() OUTPUT_FILE = os.getenv("OUTPUT_FILE", "tweets.json") _results = [] logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) def check_if_json_exists(file_path): logger.info(f"Checking if JSON exists at {file_path}") return os.path.isfile(file_path) and os.path.getsize(file_path) > 0 def load_json(file_path): logger.info(f"Loading data from {file_path}") with open(file_path, "r", encoding="utf-8") as f: return json.load(f) def write_json(file_path, data): logger.info(f"Writing data to {file_path}") with open(file_path, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=4) async def main(): global _results api = API() # or API("path-to.db") – default is `accounts.db` # ADD ACCOUNTS (for CLI usage see next readme section) # Option 1. Adding account with cookies (more stable) load_dotenv() cookies = os.getenv("COOKIES") username = os.getenv("USERNAME") password = os.getenv("PASSWORD") email = os.getenv("EMAIL") email_password = os.getenv("EMAIL_PASSWORD") await api.pool.add_account(username, password, email, email_password, cookies=cookies) await api.pool.login_all() # try to login to receive account cookies # NOTE 2: all methods have `raw` version (returns `httpx.Response` object): if check_if_json_exists(OUTPUT_FILE): _results = load_json(OUTPUT_FILE).get("tweets", []) async for rep in api.search("AI"): try: _results.append(rep.json()) logger.info("Appended tweet JSON") except Exception: logger.error("Failed to parse tweet JSON") await asyncio.sleep(random.uniform(11, 23)) # random delay between 7 and 15 seconds atexit.register(lambda: write_json(OUTPUT_FILE, {"tweets": _results})) if __name__ == "__main__": asyncio.run(main())