Skip to content

Instantly share code, notes, and snippets.

@gcmurphy
Created June 3, 2013 11:31
Show Gist options
  • Save gcmurphy/5697565 to your computer and use it in GitHub Desktop.
Save gcmurphy/5697565 to your computer and use it in GitHub Desktop.
Search github for pom.xml files and extract the dependencies.
#!/usr/bin/env ruby
require 'pp'
require 'uri'
require 'net/http'
require 'octokit'
require 'nokogiri'
require 'celluloid'
require 'mongo'
require 'logger'
logger = Logger.new(STDOUT)
module Enumerable
def pmap(&block)
futures = map { |elem| Celluloid::Future.new(elem, &block)}
futures.map { |future| future.value }
end
end
def fetch_raw(uri)
page = Nokogiri::HTML(Net::HTTP.get(uri))
page.css('a#raw-url').each do |link|
redirect = Nokogiri::HTML(Net::HTTP.get(URI("https://github.com#{link['href']}")))
return redirect.css('a').first['href']
end
return nil
end
def fetch_pom(uri)
page = Nokogiri::HTML(Net::HTTP.get(uri))
page.css('a').each do |link|
if link['href'].to_s.end_with?('/pom.xml')
return fetch_raw(URI("https://github.com#{link['href']}"))
end
end
return nil
end
def search_github(file, pg)
client = Octokit::Client.new
client.search_repositories(file, {:start_page => pg})
end
gav = ['groupId', 'artifactId', 'version']
@mongo = MongoClient.new('localhost', 27017)
@db = @client['github']
@collection = @db['poms']
pg = 1
while repos = search_github('pom.xml', pg) do
log.info("Processing search result page #{pg}")
urls = repos.pmap { |repo| fetch_pom( URI(repo.url)) }
urls.select! {|x| x != nil}
urls.each do |url|
log.info("Found pom at #{url}")
pom = Nokogiri::XML(Net::HTTP.get(URI(url)))
pom.remove_namespaces!
entry = { :url => url }
pom.xpath("//dependency").each do |dep|
dep.children().each do |child|
entry[child.name] = child.content unless gav.include?(child.name)
end
end
@collection.insert(entry) if (entry & gav).any?
end
pg += 1
end
@k9ert
Copy link

k9ert commented Dec 1, 2014

if gav.include?(child.name) not unless!

But thank you very much, very helpful!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment