Files
Monkeycrawl/main.py

59 lines
1.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
import random
from twscrape import API
import os
import json
import logging
from dotenv import load_dotenv
# Configuration
load_dotenv()
OUTPUT_FILE = os.getenv("OUTPUT_FILE", "tweets.json")
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
def check_if_json_exists(file_path):
return os.path.isfile(file_path) and os.path.getsize(file_path) > 0
def load_json(file_path):
with open(file_path, "r", encoding="utf-8") as f:
return json.load(f)
async def main():
api = API() # or API("path-to.db") default is `accounts.db`
# ADD ACCOUNTS (for CLI usage see next readme section)
# Option 1. Adding account with cookies (more stable)
load_dotenv()
cookies = os.getenv("COOKIES")
username = os.getenv("USERNAME")
password = os.getenv("PASSWORD")
email = os.getenv("EMAIL")
email_password = os.getenv("EMAIL_PASSWORD")
await api.pool.add_account(username, password, email, email_password, cookies=cookies)
await api.pool.login_all() # try to login to receive account cookies
# NOTE 2: all methods have `raw` version (returns `httpx.Response` object):
if check_if_json_exists(OUTPUT_FILE):
_results = load_json(OUTPUT_FILE).get("tweets", [])
else:
_results = []
async for rep in api.search("AI", limit=5):
try:
_results.append(rep.json())
except Exception:
logger.error("Failed to parse tweet JSON")
await asyncio.sleep(random.uniform(7, 15)) # random delay between 7 and 15 seconds
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
f.write(json.dumps({"tweets": _results}, ensure_ascii=False, indent=4))
if __name__ == "__main__":
asyncio.run(main())