Compare commits
4 Commits
a05b3a37f0
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6b5f05fd0e | ||
|
|
986e7f2564 | ||
|
|
a84fa675ba | ||
|
|
7ecccda9d8 |
@@ -3,4 +3,5 @@ COOKIES=""
|
||||
USERNAME=""
|
||||
PASSWORD=""
|
||||
EMAIL=""
|
||||
EMAIL_PASSWORD=""
|
||||
EMAIL_PASSWORD=""
|
||||
API_SEARCH_KEY=""
|
||||
20
README.md
Normal file
20
README.md
Normal file
@@ -0,0 +1,20 @@
|
||||
# Monkeycrawl
|
||||
|
||||
A small script to crawl Twitter/X that uses [twscrape](https://github.com/vladkens/twscrape).
|
||||
|
||||
Use at your own discretion. Do not multithread or risk getting banned or worse.
|
||||
|
||||
## Usage
|
||||
```cmd
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
Add cookies, accounts and passwords to `.env.local` and remove `.local` file extension.
|
||||
|
||||
```cmd
|
||||
python main.py
|
||||
```
|
||||
|
||||
When exiting the script the file `tweets.json` will contain the scraped tweets.
|
||||
7
main.py
7
main.py
@@ -1,7 +1,6 @@
|
||||
import asyncio
|
||||
import atexit
|
||||
import random
|
||||
from signal import signal
|
||||
from twscrape import API
|
||||
import os
|
||||
import json
|
||||
@@ -33,6 +32,8 @@ def write_json(file_path, data):
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
atexit.register(lambda: write_json(OUTPUT_FILE, {"tweets": _results}))
|
||||
|
||||
async def main():
|
||||
global _results
|
||||
api = API() # or API("path-to.db") – default is `accounts.db`
|
||||
@@ -45,6 +46,7 @@ async def main():
|
||||
password = os.getenv("PASSWORD")
|
||||
email = os.getenv("EMAIL")
|
||||
email_password = os.getenv("EMAIL_PASSWORD")
|
||||
api_search_key = os.getenv("API_SEARCH_KEY")
|
||||
|
||||
await api.pool.add_account(username, password, email, email_password, cookies=cookies)
|
||||
await api.pool.login_all() # try to login to receive account cookies
|
||||
@@ -54,7 +56,7 @@ async def main():
|
||||
if check_if_json_exists(OUTPUT_FILE):
|
||||
_results = load_json(OUTPUT_FILE).get("tweets", [])
|
||||
|
||||
async for rep in api.search("AI"):
|
||||
async for rep in api.search(api_search_key):
|
||||
try:
|
||||
_results.append(rep.json())
|
||||
logger.info("Appended tweet JSON")
|
||||
@@ -63,7 +65,6 @@ async def main():
|
||||
|
||||
|
||||
await asyncio.sleep(random.uniform(17, 31))
|
||||
atexit.register(lambda: write_json(OUTPUT_FILE, {"tweets": _results}))
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user