Compare commits

..

4 Commits

Author SHA1 Message Date
fzzinchemical
6b5f05fd0e Add API_SEARCH_KEY to environment variables and update main functionality to use it for searches 2025-11-13 22:27:49 +01:00
fzzinchemical
986e7f2564 Added README.md 2025-11-13 22:26:27 +01:00
fzzinchemical
a84fa675ba Remove unused signal import from main.py 2025-11-13 20:58:04 +01:00
fzzinchemical
7ecccda9d8 Fix atexit registration for JSON writing to ensure data is saved on exit 2025-11-13 20:50:07 +01:00
3 changed files with 26 additions and 4 deletions

View File

@@ -3,4 +3,5 @@ COOKIES=""
USERNAME="" USERNAME=""
PASSWORD="" PASSWORD=""
EMAIL="" EMAIL=""
EMAIL_PASSWORD="" EMAIL_PASSWORD=""
API_SEARCH_KEY=""

20
README.md Normal file
View File

@@ -0,0 +1,20 @@
# Monkeycrawl
A small script to crawl Twitter/X that uses [twscrape](https://github.com/vladkens/twscrape).
Use at your own discretion. Do not multithread or risk getting banned or worse.
## Usage
```cmd
python -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
```
Add cookies, accounts and passwords to `.env.local` and remove `.local` file extension.
```cmd
python main.py
```
When exiting the script the file `tweets.json` will contain the scraped tweets.

View File

@@ -1,7 +1,6 @@
import asyncio import asyncio
import atexit import atexit
import random import random
from signal import signal
from twscrape import API from twscrape import API
import os import os
import json import json
@@ -33,6 +32,8 @@ def write_json(file_path, data):
with open(file_path, "w", encoding="utf-8") as f: with open(file_path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4) json.dump(data, f, ensure_ascii=False, indent=4)
atexit.register(lambda: write_json(OUTPUT_FILE, {"tweets": _results}))
async def main(): async def main():
global _results global _results
api = API() # or API("path-to.db") default is `accounts.db` api = API() # or API("path-to.db") default is `accounts.db`
@@ -45,6 +46,7 @@ async def main():
password = os.getenv("PASSWORD") password = os.getenv("PASSWORD")
email = os.getenv("EMAIL") email = os.getenv("EMAIL")
email_password = os.getenv("EMAIL_PASSWORD") email_password = os.getenv("EMAIL_PASSWORD")
api_search_key = os.getenv("API_SEARCH_KEY")
await api.pool.add_account(username, password, email, email_password, cookies=cookies) await api.pool.add_account(username, password, email, email_password, cookies=cookies)
await api.pool.login_all() # try to login to receive account cookies await api.pool.login_all() # try to login to receive account cookies
@@ -54,7 +56,7 @@ async def main():
if check_if_json_exists(OUTPUT_FILE): if check_if_json_exists(OUTPUT_FILE):
_results = load_json(OUTPUT_FILE).get("tweets", []) _results = load_json(OUTPUT_FILE).get("tweets", [])
async for rep in api.search("AI"): async for rep in api.search(api_search_key):
try: try:
_results.append(rep.json()) _results.append(rep.json())
logger.info("Appended tweet JSON") logger.info("Appended tweet JSON")
@@ -63,7 +65,6 @@ async def main():
await asyncio.sleep(random.uniform(17, 31)) await asyncio.sleep(random.uniform(17, 31))
atexit.register(lambda: write_json(OUTPUT_FILE, {"tweets": _results}))
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main())