mylamour · November 30, 2017 01:41
diff --git a/pastebinhunter.py b/pastebinhunter.py
 from gevent import monkey
 monkey.patch_all()

 from SMLoki import SMLoki
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse
 from uuid import uuid1

 import signal as signal_module
 import requests
 import json
 import signal
 import sys,os
 import time

 api_dev_key = ''
 username = ''
 password = ''

 def signal_handler(signal, frame):
    sys.exit(0)


 def check_folder(folder):
    if os.path.exists(folder):
            pass
    else:
        import pathlib
        pathlib.Path(os.path.abspath(folder)).mkdir(
            parents=True, exist_ok=True)

 def store(info, filename):
    check_folder('./metaJson')
    metapath = './metaJson/{}'.format(filename)
    with open(metapath, 'a') as dest:
        dest.write(info)

 def get_raw_paste_content(item_raw_url,):
    return requests.get(item_raw_url)

 def get_intersting_res(scan, data):
    _, res = scan.scan_Target(data)
    if _ :
        return res
    else:
        return

 def get_api_user_key():
    data = {
        'api_dev_key': api_dev_key ,
        'api_user_name': username,
        'api_user_password': password
        }

    res = requests.post('https://pastebin.com/api/api_login.php', data=data)
    return res.content

 def get_api_raw_data(folder,pastebin_id):
    res = requests.get(
        'https://pastebin.com/api_scrape_item.php?i={}'.format(pastebin_id))

    outputpath = os.path.join(folder, str(uuid1()))
    print(outputpath)

    with open(outputpath, 'a') as out:
        out.write(res.text)

    return res.text  

 def get_pastebin_trends(folder):
    
    trends_url_list = []

    trends_data = {
        'api_dev_key': api_dev_key,
        'api_option' : 'trends'
    }

    res = requests.post('https://pastebin.com/api/api_post.php', data= trends_data )

    soup = BeautifulSoup(res.text,'lxml')
    
    for trends in soup.find_all('paste_url'):
        url = trends.contents[0]
        # trends_url_list.append(url)
        pastebin_tid = urlparse(url).path.replace('/', '')
        res = get_api_raw_data(folder,pastebin_tid)
        # print(res)

 def get_pastebin_250(folder):
    """
        Life Time Pro, We are 250.
        Also, Due to scrape api rule, we were allowed crawle data 1time/1second
    """

    common = "https://pastebin.com/api_scraping.php?limit=250"
    res = requests.get(common)
    store(res.text, str(uuid1()))

    data = json.loads(res.text)
    for item in data:
        res = get_api_raw_data(folder,item['key'])
        print(item['scrape_url'], res[:100])
        
 if __name__ == '__main__':
    """
       useage:
             --folder outputfolder
    """    
    signal_module.signal(signal_module.SIGINT, signal_handler)

    # use crontab, not while True
    get_pastebin_250('./caodan')
        # time.sleep(2)
	from gevent import monkey
	monkey.patch_all()

	from SMLoki import SMLoki
	from bs4 import BeautifulSoup
	from urllib.parse import urlparse
	from uuid import uuid1

	import signal as signal_module
	import requests
	import json
	import signal
	import sys,os
	import time

	api_dev_key = ''
	username = ''
	password = ''

	def signal_handler(signal, frame):
	sys.exit(0)


	def check_folder(folder):
	if os.path.exists(folder):
	pass
	else:
	import pathlib
	pathlib.Path(os.path.abspath(folder)).mkdir(
	parents=True, exist_ok=True)

	def store(info, filename):
	check_folder('./metaJson')
	metapath = './metaJson/{}'.format(filename)
	with open(metapath, 'a') as dest:
	dest.write(info)

	def get_raw_paste_content(item_raw_url,):
	return requests.get(item_raw_url)

	def get_intersting_res(scan, data):
	_, res = scan.scan_Target(data)
	if _ :
	return res
	else:
	return

	def get_api_user_key():
	data = {
	'api_dev_key': api_dev_key ,
	'api_user_name': username,
	'api_user_password': password
	}

	res = requests.post('https://pastebin.com/api/api_login.php', data=data)
	return res.content

	def get_api_raw_data(folder,pastebin_id):
	res = requests.get(
	'https://pastebin.com/api_scrape_item.php?i={}'.format(pastebin_id))

	outputpath = os.path.join(folder, str(uuid1()))
	print(outputpath)

	with open(outputpath, 'a') as out:
	out.write(res.text)

	return res.text

	def get_pastebin_trends(folder):

	trends_url_list = []

	trends_data = {
	'api_dev_key': api_dev_key,
	'api_option' : 'trends'
	}

	res = requests.post('https://pastebin.com/api/api_post.php', data= trends_data )

	soup = BeautifulSoup(res.text,'lxml')

	for trends in soup.find_all('paste_url'):
	url = trends.contents[0]
	# trends_url_list.append(url)
	pastebin_tid = urlparse(url).path.replace('/', '')
	res = get_api_raw_data(folder,pastebin_tid)
	# print(res)

	def get_pastebin_250(folder):
	"""
	Life Time Pro, We are 250.
	Also, Due to scrape api rule, we were allowed crawle data 1time/1second
	"""

	common = "https://pastebin.com/api_scraping.php?limit=250"
	res = requests.get(common)
	store(res.text, str(uuid1()))

	data = json.loads(res.text)
	for item in data:
	res = get_api_raw_data(folder,item['key'])
	print(item['scrape_url'], res[:100])

	if __name__ == '__main__':
	"""
	useage:
	--folder outputfolder
	"""
	signal_module.signal(signal_module.SIGINT, signal_handler)

	# use crontab, not while True
	get_pastebin_250('./caodan')
	# time.sleep(2)