Created
May 15, 2019 18:44
-
-
Save Magisus/8961f41668752f3fed51b6b43cb513fd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'puppet' | |
require 'puppet/parser' | |
require 'json' | |
require 'google/cloud/bigquery' | |
require 'zlib' | |
require 'rubygems/package' | |
require 'pry' | |
ENV["BIGQUERY_PROJECT"] = "platform-dujour-dev" | |
ENV["BIGQUERY_CREDENTIALS"] = "/Users/aileen/code/platform-dujour-dev.json" | |
class ModuleParser | |
def initialize | |
@parser = Puppet::Pops::Parser::EvaluatingParser.new | |
end | |
def evaluate!(filename) | |
source = Puppet::FileSystem.read(filename) | |
begin | |
result = @parser.parse_string(source, filename).definitions | |
element = result.first | |
puts "The manifest contains multiple definitions; ignoring extras." unless result.size == 1 | |
classes = [] | |
if simpletype(element) == :class | |
classes << element.name | |
end | |
classes | |
rescue Puppet::ParseError => e | |
puts "ERROR: cannot parse module, #{e}" | |
[] | |
rescue StandardError => e | |
puts "ERROR: #{e}" | |
end | |
end | |
def simpletype(element) | |
case element | |
when Puppet::Pops::Model::HostClassDefinition | |
:class | |
when Puppet::Pops::Model::ResourceTypeDefinition | |
:type | |
when Puppet::Pops::Model::FunctionDefinition | |
:function | |
else | |
puts "Unknown element definition: #{element.class}" | |
nil | |
end | |
end | |
end | |
def parse_classes(parser, digest, directory) | |
class_data = [] | |
Dir.glob("#{directory}/manifests/**/*.pp") do |manifest| | |
classes = parser.evaluate!(manifest) | |
return [] if classes.nil? | |
classes.each do |c| | |
hash = digest.hexdigest(c) | |
class_data << { name: c, hashed_name: hash } | |
end | |
end | |
class_data | |
end | |
start = Time.now | |
parser = ModuleParser.new | |
digest = Digest::SHA256.new | |
modules = [] | |
module_destination = "tmp" | |
Dir.glob("./forge/*.tar.gz") do |module_dir| | |
FileUtils.remove_dir(module_destination) if File.exist?(module_destination) | |
FileUtils.mkdir_p(module_destination) | |
Gem::Package::TarReader.new(Zlib::GzipReader.open(module_dir)) do |tar| | |
dest_file = nil | |
tar.each do |tarfile| | |
begin | |
if tarfile.full_name =~ /PaxHeaders/ || | |
tarfile.full_name =~ /\/spec\// | |
next | |
end | |
# Tar files can't handle paths longer than 100 chars, | |
# so this follows the linking hack they use to get around that | |
if tarfile.full_name == '././@LongLink' | |
dest_file = File.join module_destination, tarfile.read.strip | |
next | |
end | |
dest_file ||= File.join(module_destination, tarfile.full_name).force_encoding("UTF-8") | |
if tarfile.directory? | |
FileUtils.mkdir_p dest_file | |
else | |
dest_dir = File.dirname(dest_file) | |
FileUtils.mkdir_p dest_dir unless File.directory?(dest_dir) | |
File.open dest_file, "w:UTF-8" do |f| | |
f.print tarfile.read.force_encoding("UTF-8") | |
end | |
end | |
rescue Exception => e | |
ensure | |
dest_file = nil | |
end | |
end | |
end | |
# There should only be one module at a time in tmp | |
directory = Dir.glob("tmp/*")[0] | |
puts directory | |
metadata_file = Dir.glob("tmp/**/metadata.json")[0] | |
if metadata_file.nil? | |
puts "No metadata file, skipping module" | |
next | |
end | |
begin | |
metadata = JSON.load(File.open(metadata_file)) if File.exist?(metadata_file) | |
if metadata["name"].nil? || metadata["version"].nil? | |
puts "Missing metadata, skipping module" | |
next | |
end | |
rescue JSON::ParserError | |
puts "Error parsing meatadata, skipping module" | |
next | |
end | |
classes = parse_classes(parser, digest, directory) | |
module_data = {} | |
module_data['name'] = metadata["name"] | |
module_data['classes'] = classes | |
module_data['version'] = metadata["version"] | |
module_data['hashed_name'] = digest.hexdigest(module_data["name"]) | |
modules << module_data | |
end | |
puts "Module count: #{modules.size}" | |
File.open("results.json", "w+:UTF-8") do |f| | |
f.print modules.to_json | |
end | |
#bigquery = Google::Cloud::Bigquery.new | |
#dataset = bigquery.dataset("module_classes") | |
#table = dataset.table("forge_data") | |
#table.insert(modules) | |
finish = Time.now | |
diff = finish - start | |
puts diff |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment