Files
Monkeycrawl/main.py

70 lines
2.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
import atexit
import random
from twscrape import API
import os
import json
import logging
from dotenv import load_dotenv
# Configuration
load_dotenv()
OUTPUT_FILE = os.getenv("OUTPUT_FILE", "tweets.json")
_results = []
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
def check_if_json_exists(file_path):
logger.info(f"Checking if JSON exists at {file_path}")
return os.path.isfile(file_path) and os.path.getsize(file_path) > 0
def load_json(file_path):
logger.info(f"Loading data from {file_path}")
with open(file_path, "r", encoding="utf-8") as f:
return json.load(f)
def write_json(file_path, data):
logger.info(f"Writing data to {file_path}")
with open(file_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
atexit.register(lambda: write_json(OUTPUT_FILE, {"tweets": _results}))
async def main():
global _results
api = API() # or API("path-to.db") default is `accounts.db`
# ADD ACCOUNTS (for CLI usage see next readme section)
# Option 1. Adding account with cookies (more stable)
load_dotenv()
cookies = os.getenv("COOKIES")
username = os.getenv("USERNAME")
password = os.getenv("PASSWORD")
email = os.getenv("EMAIL")
email_password = os.getenv("EMAIL_PASSWORD")
api_search_key = os.getenv("API_SEARCH_KEY")
await api.pool.add_account(username, password, email, email_password, cookies=cookies)
await api.pool.login_all() # try to login to receive account cookies
# NOTE 2: all methods have `raw` version (returns `httpx.Response` object):
if check_if_json_exists(OUTPUT_FILE):
_results = load_json(OUTPUT_FILE).get("tweets", [])
async for rep in api.search(api_search_key):
try:
_results.append(rep.json())
logger.info("Appended tweet JSON")
except Exception:
logger.error("Failed to parse tweet JSON")
await asyncio.sleep(random.uniform(17, 31))
if __name__ == "__main__":
asyncio.run(main())