Created
November 30, 2017 01:41
-
-
Save mylamour/d9163991fd1e094a910ee7d43f8a24d9 to your computer and use it in GitHub Desktop.
Get Data from Pastebin use api
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from gevent import monkey | |
monkey.patch_all() | |
from SMLoki import SMLoki | |
from bs4 import BeautifulSoup | |
from urllib.parse import urlparse | |
from uuid import uuid1 | |
import signal as signal_module | |
import requests | |
import json | |
import signal | |
import sys,os | |
import time | |
api_dev_key = '' | |
username = '' | |
password = '' | |
def signal_handler(signal, frame): | |
sys.exit(0) | |
def check_folder(folder): | |
if os.path.exists(folder): | |
pass | |
else: | |
import pathlib | |
pathlib.Path(os.path.abspath(folder)).mkdir( | |
parents=True, exist_ok=True) | |
def store(info, filename): | |
check_folder('./metaJson') | |
metapath = './metaJson/{}'.format(filename) | |
with open(metapath, 'a') as dest: | |
dest.write(info) | |
def get_raw_paste_content(item_raw_url,): | |
return requests.get(item_raw_url) | |
def get_intersting_res(scan, data): | |
_, res = scan.scan_Target(data) | |
if _ : | |
return res | |
else: | |
return | |
def get_api_user_key(): | |
data = { | |
'api_dev_key': api_dev_key , | |
'api_user_name': username, | |
'api_user_password': password | |
} | |
res = requests.post('https://pastebin.com/api/api_login.php', data=data) | |
return res.content | |
def get_api_raw_data(folder,pastebin_id): | |
res = requests.get( | |
'https://pastebin.com/api_scrape_item.php?i={}'.format(pastebin_id)) | |
outputpath = os.path.join(folder, str(uuid1())) | |
print(outputpath) | |
with open(outputpath, 'a') as out: | |
out.write(res.text) | |
return res.text | |
def get_pastebin_trends(folder): | |
trends_url_list = [] | |
trends_data = { | |
'api_dev_key': api_dev_key, | |
'api_option' : 'trends' | |
} | |
res = requests.post('https://pastebin.com/api/api_post.php', data= trends_data ) | |
soup = BeautifulSoup(res.text,'lxml') | |
for trends in soup.find_all('paste_url'): | |
url = trends.contents[0] | |
# trends_url_list.append(url) | |
pastebin_tid = urlparse(url).path.replace('/', '') | |
res = get_api_raw_data(folder,pastebin_tid) | |
# print(res) | |
def get_pastebin_250(folder): | |
""" | |
Life Time Pro, We are 250. | |
Also, Due to scrape api rule, we were allowed crawle data 1time/1second | |
""" | |
common = "https://pastebin.com/api_scraping.php?limit=250" | |
res = requests.get(common) | |
store(res.text, str(uuid1())) | |
data = json.loads(res.text) | |
for item in data: | |
res = get_api_raw_data(folder,item['key']) | |
print(item['scrape_url'], res[:100]) | |
if __name__ == '__main__': | |
""" | |
useage: | |
--folder outputfolder | |
""" | |
signal_module.signal(signal_module.SIGINT, signal_handler) | |
# use crontab, not while True | |
get_pastebin_250('./caodan') | |
# time.sleep(2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment