Skip to content

Instantly share code, notes, and snippets.

@itsecurityco
Last active December 21, 2017 05:12
Show Gist options
  • Save itsecurityco/1e79596fac69fbdd49a33784e2e766f2 to your computer and use it in GitHub Desktop.
Save itsecurityco/1e79596fac69fbdd49a33784e2e766f2 to your computer and use it in GitHub Desktop.
Scrape wordpress plugins
#!/bin/python
# @itseco
# https://github.com/itseco/
# Extract download url's for popular Wordpress plugins
# Usage python script.py pages
# python script.py 99
from lxml import html
import sys
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
# number of pages
pages = int(sys.argv[1]) + 1
for page in range(1, pages):
# all plugins (3,819 pages)
# url = "https://wordpress.org/plugins/page/%s/?s" % page
# popular plugins (99 pages)
url = "https://cl.wordpress.org/plugins/browse/popular/page/%s/" % page
page = requests.get(url, verify=False)
tree = html.fromstring(page.content)
plugins = tree.xpath('//h2[@class="entry-title"]/a/@href')
for plugin in plugins:
page = requests.get(plugin, verify=False)
tree = html.fromstring(page.content)
link = tree.xpath('//div[@class="plugin-actions"]/a/@href')
print ''.join(link)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment