Monkeycrawl/main.py

import asyncio
import random
from twscrape import API
import os
import json
import logging
from dotenv import load_dotenv


# Configuration
load_dotenv()
OUTPUT_FILE = os.getenv("OUTPUT_FILE", "tweets.json")

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)

def check_if_json_exists(file_path):
    return os.path.isfile(file_path) and os.path.getsize(file_path) > 0

def load_json(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)

async def main():
    api = API()  # or API("path-to.db") – default is `accounts.db`
    # ADD ACCOUNTS (for CLI usage see next readme section)

    # Option 1. Adding account with cookies (more stable)
    load_dotenv()
    cookies = os.getenv("COOKIES")
    username = os.getenv("USERNAME")
    password = os.getenv("PASSWORD")
    email = os.getenv("EMAIL")
    email_password = os.getenv("EMAIL_PASSWORD")

    await api.pool.add_account(username, password, email, email_password, cookies=cookies)
    await api.pool.login_all() # try to login to receive account cookies

    # NOTE 2: all methods have `raw` version (returns `httpx.Response` object):

    if check_if_json_exists(OUTPUT_FILE):
        _results = load_json(OUTPUT_FILE).get("tweets", [])
    else:
        _results = []

    async for rep in api.search("AI", limit=5):
        try:
            _results.append(rep.json())
        except Exception:
            logger.error("Failed to parse tweet JSON")

        await asyncio.sleep(random.uniform(7, 15))  # random delay between 7 and 15 seconds

    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
        f.write(json.dumps({"tweets": _results}, ensure_ascii=False, indent=4))


if __name__ == "__main__":
    asyncio.run(main())