Add API_SEARCH_KEY to environment variables and update main functionality to use it for searches

Added README.md
Remove unused signal import from main.py
2025-11-13 22:27:49 +01:00 · 2025-11-13 22:26:27 +01:00 · 2025-11-13 20:58:04 +01:00 · 2025-11-13 20:50:07 +01:00 · 2025-11-13 20:49:16 +01:00 · 2025-11-13 20:43:26 +01:00
3 changed files with 40 additions and 8 deletions
--- a/.env.local
+++ b/.env.local
@@ -3,4 +3,5 @@ COOKIES=""
 USERNAME=""
 PASSWORD=""
 EMAIL=""
-EMAIL_PASSWORD=""
+EMAIL_PASSWORD=""
+API_SEARCH_KEY=""
--- a/README.md
+++ b/README.md
@@ -0,0 +1,20 @@
+# Monkeycrawl
+
+A small script to crawl Twitter/X that uses [twscrape](https://github.com/vladkens/twscrape).
+
+Use at your own discretion. Do not multithread or risk getting banned or worse.
+
+## Usage
+```cmd
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+```
+
+Add cookies, accounts and passwords to `.env.local` and remove `.local` file extension.
+
+```cmd
+python main.py
+```
+
+When exiting the script the file `tweets.json` will contain the scraped tweets.
--- a/main.py
+++ b/main.py
@@ -1,4 +1,5 @@
 import asyncio
+import atexit
 import random
 from twscrape import API
 import os
@@ -11,17 +12,30 @@ from dotenv import load_dotenv
 load_dotenv()
 OUTPUT_FILE = os.getenv("OUTPUT_FILE", "tweets.json")

+_results = []
+
+
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 logger = logging.getLogger(__name__)

 def check_if_json_exists(file_path):
+    logger.info(f"Checking if JSON exists at {file_path}")
    return os.path.isfile(file_path) and os.path.getsize(file_path) > 0

 def load_json(file_path):
+    logger.info(f"Loading data from {file_path}")
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)

+def write_json(file_path, data):
+    logger.info(f"Writing data to {file_path}")
+    with open(file_path, "w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=4)
+
+atexit.register(lambda: write_json(OUTPUT_FILE, {"tweets": _results}))
+
 async def main():
+    global _results
    api = API()  # or API("path-to.db") – default is `accounts.db`
    # ADD ACCOUNTS (for CLI usage see next readme section)

@@ -32,6 +46,7 @@ async def main():
    password = os.getenv("PASSWORD")
    email = os.getenv("EMAIL")
    email_password = os.getenv("EMAIL_PASSWORD")
+    api_search_key = os.getenv("API_SEARCH_KEY")

    await api.pool.add_account(username, password, email, email_password, cookies=cookies)
    await api.pool.login_all() # try to login to receive account cookies
@@ -40,20 +55,16 @@ async def main():
    
    if check_if_json_exists(OUTPUT_FILE):
        _results = load_json(OUTPUT_FILE).get("tweets", [])
-    else:
-        _results = []
    
-    async for rep in api.search("AI", limit=5):
+    async for rep in api.search(api_search_key):
        try:
            _results.append(rep.json())
+            logger.info("Appended tweet JSON")
        except Exception:
            logger.error("Failed to parse tweet JSON")

-        await asyncio.sleep(random.uniform(7, 15))  # random delay between 7 and 15 seconds
-        
-    with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
-        f.write(json.dumps({"tweets": _results}, ensure_ascii=False, indent=4))

+        await asyncio.sleep(random.uniform(17, 31))

 if __name__ == "__main__":
    asyncio.run(main())
Author	SHA1	Message	Date
fzzinchemical	6b5f05fd0e	Add API_SEARCH_KEY to environment variables and update main functionality to use it for searches	2025-11-13 22:27:49 +01:00
fzzinchemical	986e7f2564	Added README.md	2025-11-13 22:26:27 +01:00
fzzinchemical	a84fa675ba	Remove unused signal import from main.py	2025-11-13 20:58:04 +01:00
fzzinchemical	7ecccda9d8	Fix atexit registration for JSON writing to ensure data is saved on exit	2025-11-13 20:50:07 +01:00
fzzinchemical	a05b3a37f0	Update random sleep duration in main functionality for API rate limiting	2025-11-13 20:49:16 +01:00
fzzinchemical	5440f72452	Add atexit handling and improve JSON logging in main functionality	2025-11-13 20:43:26 +01:00