Skip to content

Instantly share code, notes, and snippets.

@Magisus
Created May 15, 2019 18:44
Show Gist options
  • Save Magisus/8961f41668752f3fed51b6b43cb513fd to your computer and use it in GitHub Desktop.
Save Magisus/8961f41668752f3fed51b6b43cb513fd to your computer and use it in GitHub Desktop.
require 'puppet'
require 'puppet/parser'
require 'json'
require 'google/cloud/bigquery'
require 'zlib'
require 'rubygems/package'
require 'pry'
ENV["BIGQUERY_PROJECT"] = "platform-dujour-dev"
ENV["BIGQUERY_CREDENTIALS"] = "/Users/aileen/code/platform-dujour-dev.json"
class ModuleParser
def initialize
@parser = Puppet::Pops::Parser::EvaluatingParser.new
end
def evaluate!(filename)
source = Puppet::FileSystem.read(filename)
begin
result = @parser.parse_string(source, filename).definitions
element = result.first
puts "The manifest contains multiple definitions; ignoring extras." unless result.size == 1
classes = []
if simpletype(element) == :class
classes << element.name
end
classes
rescue Puppet::ParseError => e
puts "ERROR: cannot parse module, #{e}"
[]
rescue StandardError => e
puts "ERROR: #{e}"
end
end
def simpletype(element)
case element
when Puppet::Pops::Model::HostClassDefinition
:class
when Puppet::Pops::Model::ResourceTypeDefinition
:type
when Puppet::Pops::Model::FunctionDefinition
:function
else
puts "Unknown element definition: #{element.class}"
nil
end
end
end
def parse_classes(parser, digest, directory)
class_data = []
Dir.glob("#{directory}/manifests/**/*.pp") do |manifest|
classes = parser.evaluate!(manifest)
return [] if classes.nil?
classes.each do |c|
hash = digest.hexdigest(c)
class_data << { name: c, hashed_name: hash }
end
end
class_data
end
start = Time.now
parser = ModuleParser.new
digest = Digest::SHA256.new
modules = []
module_destination = "tmp"
Dir.glob("./forge/*.tar.gz") do |module_dir|
FileUtils.remove_dir(module_destination) if File.exist?(module_destination)
FileUtils.mkdir_p(module_destination)
Gem::Package::TarReader.new(Zlib::GzipReader.open(module_dir)) do |tar|
dest_file = nil
tar.each do |tarfile|
begin
if tarfile.full_name =~ /PaxHeaders/ ||
tarfile.full_name =~ /\/spec\//
next
end
# Tar files can't handle paths longer than 100 chars,
# so this follows the linking hack they use to get around that
if tarfile.full_name == '././@LongLink'
dest_file = File.join module_destination, tarfile.read.strip
next
end
dest_file ||= File.join(module_destination, tarfile.full_name).force_encoding("UTF-8")
if tarfile.directory?
FileUtils.mkdir_p dest_file
else
dest_dir = File.dirname(dest_file)
FileUtils.mkdir_p dest_dir unless File.directory?(dest_dir)
File.open dest_file, "w:UTF-8" do |f|
f.print tarfile.read.force_encoding("UTF-8")
end
end
rescue Exception => e
ensure
dest_file = nil
end
end
end
# There should only be one module at a time in tmp
directory = Dir.glob("tmp/*")[0]
puts directory
metadata_file = Dir.glob("tmp/**/metadata.json")[0]
if metadata_file.nil?
puts "No metadata file, skipping module"
next
end
begin
metadata = JSON.load(File.open(metadata_file)) if File.exist?(metadata_file)
if metadata["name"].nil? || metadata["version"].nil?
puts "Missing metadata, skipping module"
next
end
rescue JSON::ParserError
puts "Error parsing meatadata, skipping module"
next
end
classes = parse_classes(parser, digest, directory)
module_data = {}
module_data['name'] = metadata["name"]
module_data['classes'] = classes
module_data['version'] = metadata["version"]
module_data['hashed_name'] = digest.hexdigest(module_data["name"])
modules << module_data
end
puts "Module count: #{modules.size}"
File.open("results.json", "w+:UTF-8") do |f|
f.print modules.to_json
end
#bigquery = Google::Cloud::Bigquery.new
#dataset = bigquery.dataset("module_classes")
#table = dataset.table("forge_data")
#table.insert(modules)
finish = Time.now
diff = finish - start
puts diff
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment