Last active
August 29, 2015 13:55
-
-
Save daveadams/8730747 to your computer and use it in GitHub Desktop.
Sakai disk usage reports by binary properties
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# binary-resource-report.rb | |
# Reports disk usage broken down by properties in the binary_entity field. | |
# | |
# WARNING: takes a long time to run | |
# (~5 hours for 10,000,000 records) | |
# | |
# Written by David Adams ([email protected]) | |
# | |
# This software is licensed to the Public Domain; No Rights Reserved | |
# | |
# Requirements | |
# You must install the sakai-info gem as well as a database driver. | |
# Tested drivers are: | |
# For Oracle: ruby-oci8 | |
# For MySQL: mysql2 | |
# | |
# MySQL access may also work with the mysqlplus or mysql drivers. | |
# | |
###################################################################### | |
# | |
# Variables | |
# | |
# connect to database using Sequel connection strings | |
# (http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html) | |
# | |
CONNECTION_STRING = "oracle://username:password@dbsid" | |
# | |
# Report filenames | |
# | |
CREATORS_REPORT = "creators.csv" | |
CREATEDATES_REPORT = "creation-dates.csv" | |
MIMETYPES_REPORT = "mimetypes.csv" | |
EXTENSIONS_REPORT = "file-extensions.csv" | |
# | |
###################################################################### | |
STDOUT.sync = true | |
require 'sakai-info' | |
SakaiInfo::DB.configure({ "db" => CONNECTION_STRING }) | |
creators = {} | |
createdates = {} | |
mimetypes = {} | |
extensions = {} | |
counter = 0 | |
creator_missing = 0 | |
created_errors = 0 | |
mimetype_errors = 0 | |
no_display_name = 0 | |
print "Collecting data..." | |
SakaiInfo::DB.connect[:content_resource].select(:resource_id, :binary_entity).all.each do |dbrow| | |
begin | |
meta = SakaiInfo::ContentBinaryEntity.new(dbrow[:binary_entity]) | |
# creators | |
creator = meta["CHEF:creator"] | |
if creator.nil? | |
creator_missing += 1 | |
else | |
if creators[creator].nil? | |
creators[creator] = { :count => 1, :size => meta["content_length"] } | |
else | |
creators[creator][:count] += 1 | |
creators[creator][:size] += meta["content_length"] | |
end | |
end | |
# created dates | |
created = meta["DAV:creationdate"] | |
if not created.nil? | |
created = created.slice(0,8) | |
if createdates[created].nil? | |
createdates[created] = { :count => 1, :size => meta["content_length"] } | |
else | |
createdates[created][:count] += 1 | |
createdates[created][:size] += meta["content_length"] | |
end | |
else | |
created_errors += 1 | |
end | |
# mimetypes | |
mimetype = meta["content_type"] | |
if not mimetype.nil? | |
if mimetypes[mimetype].nil? | |
mimetypes[mimetype] = { :count => 1, :size => meta["content_length"] } | |
else | |
mimetypes[mimetype][:count] += 1 | |
mimetypes[mimetype][:size] += meta["content_length"] | |
end | |
else | |
mimetype_errors += 1 | |
end | |
# file extensions | |
extension = "" | |
if not meta["DAV:displayname"].nil? | |
extension = meta["DAV:displayname"].split(".").last | |
extension = "" if extension == meta["DAV:displayname"] | |
else | |
no_display_name += 1 | |
end | |
if extensions[extension].nil? | |
extensions[extension] = { :count => 1, :size => meta["content_length"] } | |
else | |
extensions[extension][:count] += 1 | |
extensions[extension][:size] += meta["content_length"] | |
end | |
# UI | |
counter += 1 | |
if counter > 500 | |
print "." | |
counter = 0 | |
end | |
rescue => e | |
STDERR.puts "ERROR: #{dbrow[:resource_id]}" | |
STDERR.puts " #{e}" | |
end | |
end | |
puts " OK" | |
puts | |
puts "Quick summary:" | |
puts " Unique file creators: #{creators.keys.count}" | |
puts " Unique creation dates: #{createdates.keys.count}" | |
puts " Unique mime types: #{mimetypes.keys.count}" | |
puts " Unique file extensions: #{extensions.keys.count}" | |
puts | |
puts " Files with no creator: #{creator_missing}" | |
puts " Created date errors: #{created_errors}" | |
puts " Mime type errors: #{mimetype_errors}" | |
puts " Missing display name: #{no_display_name}" | |
puts | |
print "Writing out #{CREATORS_REPORT}... " | |
File.open(CREATORS_REPORT, "w") do |f| | |
f.puts "user_id,file count,total size" | |
creators.keys.each do |user_id| | |
f.puts "#{user_id},#{creators[user_id][:count]},#{creators[user_id][:size]}" | |
end | |
end | |
puts "OK" | |
print "Writing out #{CREATEDATES_REPORT}... " | |
File.open(CREATEDATES_REPORT, "w") do |f| | |
f.puts "creation date,file count,total size" | |
createdates.keys.sort.each do |createdate| | |
f.puts "#{createdate},#{createdates[createdate][:count]},#{createdates[createdate][:size]}" | |
end | |
end | |
puts "OK" | |
print "Writing out #{MIMETYPES_REPORT}... " | |
File.open(MIMETYPES_REPORT, "w") do |f| | |
f.puts "mimetype,file count,total size" | |
mimetypes.keys.each do |mimetype| | |
f.puts "#{mimetype},#{mimetypes[mimetype][:count]},#{mimetypes[mimetype][:size]}" | |
end | |
end | |
puts "OK" | |
print "Writing out #{EXTENSIONS_REPORT}... " | |
File.open(EXTENSIONS_REPORT, "w") do |f| | |
f.puts "extension,file count,total size" | |
extensions.keys.each do |extension| | |
f.puts "#{extension},#{extensions[extension][:count]},#{extensions[extension][:size]}" | |
end | |
end | |
puts "OK" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment