rooreynolds · December 31, 2015 05:49 · rooreynolds · Dec 13, 2013 · rooreynolds · Dec 13, 2013
diff --git a/pivotal_parse.rb b/pivotal_parse.rb
 require 'csv'
 require 'sqlite3'

 def setupDB(db, csv_file)
 	db.execute("drop table if exists stories")
 	db.execute("create table stories(id, labels, iterationend, created, accepted, size, requester, owner, type)")
 	CSV.foreach(File.path(csv_file), :headers => true) do |col| 
 	   db.execute("insert into stories(id, labels, iterationend, created, accepted, size, requester, owner, type) " + 
 	   	          "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", 
 	   	          [col['Id'], 
 	   	          col['Labels'], 
 	   	          col['Iteration End'] ? Date.parse(col['Iteration End']).to_s : 0, 
 	   	          Date.parse(col['Created at']).to_s, 
 	   	          col['Accepted at'] ? Date.parse(col['Accepted at']).to_s : 0, 
 	   	          col['Estimate'].to_i, col['Requested By'], 
 	   	          col['Owned By'], col['Story Type']])
 	end
 	puts "Parsed #{csv_file} and stored in database"
 end 

 def storySizeAndAgePerSprint(db, output_filename)
 	File.open(output_filename, 'w') { |file| 
 		file.puts "iteration end,count,average size,total points,min age,max age,av age"
 		db.execute( "select iterationend, " +
 		"count(id) as count, " +
 		"avg(size) as avsize, " + 
 		"sum(size) as points, " + 
 		"min(julianday(accepted) - julianday(created)) as minage, " +
 		"max(julianday(accepted) - julianday(created)) as maxage, " +
 		"avg(julianday(accepted) - julianday(created)) as avage " +
 		"from stories where accepted != 0 " +
 		"group by iterationend" ) do |row|
 			file.puts "#{row['iterationend']},#{row['count']},#{row['avsize'].round(3)},#{row['points'].round(3)},#{row['minage']},#{row['maxage']},#{row['avage'].round(3)}"
 		end
 	}
 	puts "Written #{output_filename}"
 end

 def storySizePerSprint(db, as_percentage = false, output_filename)
 	File.open(output_filename, 'w') { |file| 
 		if as_percentage then
 			file.puts "iteration end,% 0s,% 1s,% 2s,% 3s,% 5s,% 8s"
 		else
 			file.puts "iteration end,0s,1s,2s,3s,5s,8s"
 		end
 		db.execute( "select iterationend, " +
 		"count(case when size = 0 then id else null end) as zeros, " +
 		"count(case when size = 1 then id else null end) as ones, " +
 		"count(case when size = 2 then id else null end) as twos, " +
 		"count(case when size = 3 then id else null end) as threes, " +
 		"count(case when size = 5 then id else null end) as fives, " +
 		"count(case when size = 8 then id else null end) as eights " +
 		"from stories where accepted != 0 " +
 		"group by iterationend" ) do |row|
 			if as_percentage then
 				zeros = row['zeros']
 				ones = row['ones']
 				twos = row['twos']
 				threes = row['threes']
 				fives = row['fives']
 				eights = row['eights']
 				total = zeros + ones + twos + threes + fives + eights
 				file.puts "#{row['iterationend']},#{(zeros.to_f/total*100).round(3)},#{(ones.to_f/total*100).round(3)},#{(twos.to_f/total*100).round(3)},#{(threes.to_f/total*100).round(3)},#{(fives.to_f/total*100).round(3)},#{(eights.to_f/total*100).round(3)}"
 			else 
 				file.puts "#{row['iterationend']},#{row['zeros']},#{row['ones']},#{row['twos']},#{row['threes']},#{row['fives']},#{row['eights']}"
 			end 
 		end
 	}
 	puts "Written #{output_filename}"
 end

 def typePerSprint(db, as_percentage = false, output_filename)
 	File.open(output_filename, 'w') { |file| 
 		if as_percentage
 			file.puts "iteration end,% bugs,% chores,% features"
 		else 
 			file.puts "iteration end,bugs,chores,features"
 		end
 		db.execute( "select iterationend, " +
 		"count(case when type = 'feature' then id else null end) as feature, " +
 		"count(case when type = 'chore' then id else null end) as chore, " + 
 		"count(case when type = 'bug' then id else null end) as bug " +
 		"from stories where type != 'release' and accepted != 0 group by iterationend" ) do |row|
 			if as_percentage then
 				features = row['feature']
 				chores = row['chore']
 				bugs = row['bug']
 				total = features + chores + bugs
 				file.puts "#{row['iterationend']},#{(bugs.to_f/total*100).round(3)},#{(chores.to_f/total*100).round(3)},#{(features.to_f/total*100).round(3)}"
 			else
 				file.puts "#{row['iterationend']},#{row['bug']},#{row['chore']},#{row['feature']}"
 			end
 		end
 	}
 	puts "Written #{output_filename}"
 end

 def allLabels(db, output_filename)
 	File.open(output_filename, 'w') { |file| 
 		file.print "label,count"
 		labels_hash = Hash.new(0)
 		db.execute("select labels,count(labels) as count from stories group by labels order by count desc") do |row|
 			count = row['count']
 			labels = row['labels']
 			labels.split(",").each{|label|
 				labels_hash[label.strip] += count
 			}
 		end
 		labels_hash.each{|key,count|
 			file.puts "#{key},#{count}"
 		}
 	}
 	puts "Written #{output_filename}"
 end


 def labelsPerSprint(db, as_percentage = false, tags, output_filename)
 	File.open(output_filename, 'w') { |file| 
 		if as_percentage
 			file.print "iteration end,"
 			tags.each {|key| file.print "% #{key},"}
 			file.print "% other\n"
 		else
 			file.print "iteration end,", tags.join(','), ",other\n"
 		end
 	    sql = "select iterationend, "
 	    tags.each {|key| sql += "count(case when labels like '%#{key}%' then id else null end) as #{key.gsub(/[- :]/, '')},\n"}
 		sql += "count(case when "
 		tags.each{|key| sql += "labels not like '%#{key}%' and "}
 		sql += "1 = 1 then id else null end) as other \n"
 		sql += "from stories where accepted != 0 group by iterationend"
 		db.execute(sql) do |row|
 			if as_percentage
 				total = 0
 				tags.each {|key| total+=row[key.gsub(/[- :]/, '')]}
 				total += row['other']
 				file.print "#{row['iterationend']},"
 				tags.each {|key| file.print "#{(row[key.gsub(/[- :]/, '')].to_f/total*100).round(3)},"}
 				file.puts (row['other'].to_f/total*100).round(3)
 			else
 				file.print "#{row['iterationend']},"
 				tags.each {|key| file.print "#{row[key.gsub(/[- :]/, '')]},"}
 				file.puts row['other']
 			end
 		end
 	}
 	puts "Written #{output_filename}"
 end

 def averageAgeBySize(db, output_filename)
 	File.open(output_filename, 'w') { |file|
 		file.puts "size,average age" 
 		db.execute("select size, avg(julianday(accepted) - julianday(created)) as av_age from stories where accepted != 0 group by size" ) do |row|
 			file.puts "#{row['size']},#{row['av_age'].round(3)}"
 		end
 	}
 	puts "Written #{output_filename}"
 end

 def ages(db, output_filename)
 	File.open(output_filename, 'w') { |file|
 		file.puts "av age done,av age backlog,av age icebox"
 		db.execute("select " +
 		"avg(case when accepted != 0 then julianday(accepted) - julianday(created) else null end) as avage_done, " +
 		"avg(case when (iterationend != 0 and accepted = 0) then julianday(date('now')) - julianday(created) else null end) as avage_backlog, " +
 		"avg(case when iterationend = 0 then julianday(date('now')) - julianday(created) else null end) as avage_icebox " +
 		"from stories" ) do |row|
 			file.puts "#{row['avage_done'].round(2)},#{row['avage_backlog'].round(2)},#{row['avage_icebox'].round(2)}"
 		end
 	}
 	puts "Written #{output_filename}"
 end

 def lengths(db, output_filename)
 	File.open(output_filename, 'w') { |file|
 		file.puts "done,backlog,icebox"
 		db.execute("select " +
 		"count(case when accepted != 0 then id else null end) as count_done, " +
 		"count(case when (iterationend != 0 and accepted = 0) then id else null end) as count_backlog, " +
 		"count(case when iterationend = 0 then id else null end) as count_icebox " +
 		"from stories" ) do |row|
 			file.puts "#{row['count_done']},#{row['count_backlog']},#{row['count_icebox']}"
 		end
 	}
 	puts "Written #{output_filename}"
 end


 csv_file = "mainstream__development__20131213_1126.csv"
 labels = ['bfsf', 'fco', 'smart-answer', 'travel advice']

 db_file = "#{csv_file}.db"
 db = SQLite3::Database.new(db_file)
 puts "Opened #{db_file}"
 db.results_as_hash = true
 setupDB(db, csv_file)
 storySizeAndAgePerSprint(db, "1_sizeandage_#{csv_file}")
 storySizePerSprint(db, false, "2a_sizepersprint_#{csv_file}")
 storySizePerSprint(db, true, "2b_sizepercentpersprint_#{csv_file}")
 typePerSprint(db, false, "3a_typepersprint_#{csv_file}")
 typePerSprint(db, true, "3b_typepercentpersprint_#{csv_file}")
 allLabels(db, "4_labels_#{csv_file}")
 labelsPerSprint(db, false, labels, "5a_labelspersprint_#{csv_file}")
 labelsPerSprint(db, true, labels, "5b_labelspercentpersprint_#{csv_file}")
 averageAgeBySize(db, "6_agebysize_#{csv_file}")
 ages(db, "7_ages_#{csv_file}")
 lengths(db, "8_lengths_#{csv_file}")

 db.close
	require 'csv'
	require 'sqlite3'

	def setupDB(db, csv_file)
	db.execute("drop table if exists stories")
	db.execute("create table stories(id, labels, iterationend, created, accepted, size, requester, owner, type)")
	CSV.foreach(File.path(csv_file), :headers => true) do \|col\|
	db.execute("insert into stories(id, labels, iterationend, created, accepted, size, requester, owner, type) " +
	"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
	[col['Id'],
	col['Labels'],
	col['Iteration End'] ? Date.parse(col['Iteration End']).to_s : 0,
	Date.parse(col['Created at']).to_s,
	col['Accepted at'] ? Date.parse(col['Accepted at']).to_s : 0,
	col['Estimate'].to_i, col['Requested By'],
	col['Owned By'], col['Story Type']])
	end
	puts "Parsed #{csv_file} and stored in database"
	end

	def storySizeAndAgePerSprint(db, output_filename)
	File.open(output_filename, 'w') { \|file\|
	file.puts "iteration end,count,average size,total points,min age,max age,av age"
	db.execute( "select iterationend, " +
	"count(id) as count, " +
	"avg(size) as avsize, " +
	"sum(size) as points, " +
	"min(julianday(accepted) - julianday(created)) as minage, " +
	"max(julianday(accepted) - julianday(created)) as maxage, " +
	"avg(julianday(accepted) - julianday(created)) as avage " +
	"from stories where accepted != 0 " +
	"group by iterationend" ) do \|row\|
	file.puts "#{row['iterationend']},#{row['count']},#{row['avsize'].round(3)},#{row['points'].round(3)},#{row['minage']},#{row['maxage']},#{row['avage'].round(3)}"
	end
	}
	puts "Written #{output_filename}"
	end

	def storySizePerSprint(db, as_percentage = false, output_filename)
	File.open(output_filename, 'w') { \|file\|
	if as_percentage then
	file.puts "iteration end,% 0s,% 1s,% 2s,% 3s,% 5s,% 8s"
	else
	file.puts "iteration end,0s,1s,2s,3s,5s,8s"
	end
	db.execute( "select iterationend, " +
	"count(case when size = 0 then id else null end) as zeros, " +
	"count(case when size = 1 then id else null end) as ones, " +
	"count(case when size = 2 then id else null end) as twos, " +
	"count(case when size = 3 then id else null end) as threes, " +
	"count(case when size = 5 then id else null end) as fives, " +
	"count(case when size = 8 then id else null end) as eights " +
	"from stories where accepted != 0 " +
	"group by iterationend" ) do \|row\|
	if as_percentage then
	zeros = row['zeros']
	ones = row['ones']
	twos = row['twos']
	threes = row['threes']
	fives = row['fives']
	eights = row['eights']
	total = zeros + ones + twos + threes + fives + eights
	file.puts "#{row['iterationend']},#{(zeros.to_f/total100).round(3)},#{(ones.to_f/total100).round(3)},#{(twos.to_f/total100).round(3)},#{(threes.to_f/total100).round(3)},#{(fives.to_f/total100).round(3)},#{(eights.to_f/total100).round(3)}"
	else
	file.puts "#{row['iterationend']},#{row['zeros']},#{row['ones']},#{row['twos']},#{row['threes']},#{row['fives']},#{row['eights']}"
	end
	end
	}
	puts "Written #{output_filename}"
	end

	def typePerSprint(db, as_percentage = false, output_filename)
	File.open(output_filename, 'w') { \|file\|
	if as_percentage
	file.puts "iteration end,% bugs,% chores,% features"
	else
	file.puts "iteration end,bugs,chores,features"
	end
	db.execute( "select iterationend, " +
	"count(case when type = 'feature' then id else null end) as feature, " +
	"count(case when type = 'chore' then id else null end) as chore, " +
	"count(case when type = 'bug' then id else null end) as bug " +
	"from stories where type != 'release' and accepted != 0 group by iterationend" ) do \|row\|
	if as_percentage then
	features = row['feature']
	chores = row['chore']
	bugs = row['bug']
	total = features + chores + bugs
	file.puts "#{row['iterationend']},#{(bugs.to_f/total100).round(3)},#{(chores.to_f/total100).round(3)},#{(features.to_f/total*100).round(3)}"
	else
	file.puts "#{row['iterationend']},#{row['bug']},#{row['chore']},#{row['feature']}"
	end
	end
	}
	puts "Written #{output_filename}"
	end

	def allLabels(db, output_filename)
	File.open(output_filename, 'w') { \|file\|
	file.print "label,count"
	labels_hash = Hash.new(0)
	db.execute("select labels,count(labels) as count from stories group by labels order by count desc") do \|row\|
	count = row['count']
	labels = row['labels']
	labels.split(",").each{\|label\|
	labels_hash[label.strip] += count
	}
	end
	labels_hash.each{\|key,count\|
	file.puts "#{key},#{count}"
	}
	}
	puts "Written #{output_filename}"
	end


	def labelsPerSprint(db, as_percentage = false, tags, output_filename)
	File.open(output_filename, 'w') { \|file\|
	if as_percentage
	file.print "iteration end,"
	tags.each {\|key\| file.print "% #{key},"}
	file.print "% other\n"
	else
	file.print "iteration end,", tags.join(','), ",other\n"
	end
	sql = "select iterationend, "
	tags.each {\|key\| sql += "count(case when labels like '%#{key}%' then id else null end) as #{key.gsub(/[- :]/, '')},\n"}
	sql += "count(case when "
	tags.each{\|key\| sql += "labels not like '%#{key}%' and "}
	sql += "1 = 1 then id else null end) as other \n"
	sql += "from stories where accepted != 0 group by iterationend"
	db.execute(sql) do \|row\|
	if as_percentage
	total = 0
	tags.each {\|key\| total+=row[key.gsub(/[- :]/, '')]}
	total += row['other']
	file.print "#{row['iterationend']},"
	tags.each {\|key\| file.print "#{(row[key.gsub(/[- :]/, '')].to_f/total*100).round(3)},"}
	file.puts (row['other'].to_f/total*100).round(3)
	else
	file.print "#{row['iterationend']},"
	tags.each {\|key\| file.print "#{row[key.gsub(/[- :]/, '')]},"}
	file.puts row['other']
	end
	end
	}
	puts "Written #{output_filename}"
	end

	def averageAgeBySize(db, output_filename)
	File.open(output_filename, 'w') { \|file\|
	file.puts "size,average age"
	db.execute("select size, avg(julianday(accepted) - julianday(created)) as av_age from stories where accepted != 0 group by size" ) do \|row\|
	file.puts "#{row['size']},#{row['av_age'].round(3)}"
	end
	}
	puts "Written #{output_filename}"
	end

	def ages(db, output_filename)
	File.open(output_filename, 'w') { \|file\|
	file.puts "av age done,av age backlog,av age icebox"
	db.execute("select " +
	"avg(case when accepted != 0 then julianday(accepted) - julianday(created) else null end) as avage_done, " +
	"avg(case when (iterationend != 0 and accepted = 0) then julianday(date('now')) - julianday(created) else null end) as avage_backlog, " +
	"avg(case when iterationend = 0 then julianday(date('now')) - julianday(created) else null end) as avage_icebox " +
	"from stories" ) do \|row\|
	file.puts "#{row['avage_done'].round(2)},#{row['avage_backlog'].round(2)},#{row['avage_icebox'].round(2)}"
	end
	}
	puts "Written #{output_filename}"
	end

	def lengths(db, output_filename)
	File.open(output_filename, 'w') { \|file\|
	file.puts "done,backlog,icebox"
	db.execute("select " +
	"count(case when accepted != 0 then id else null end) as count_done, " +
	"count(case when (iterationend != 0 and accepted = 0) then id else null end) as count_backlog, " +
	"count(case when iterationend = 0 then id else null end) as count_icebox " +
	"from stories" ) do \|row\|
	file.puts "#{row['count_done']},#{row['count_backlog']},#{row['count_icebox']}"
	end
	}
	puts "Written #{output_filename}"
	end


	csv_file = "mainstream__development__20131213_1126.csv"
	labels = ['bfsf', 'fco', 'smart-answer', 'travel advice']

	db_file = "#{csv_file}.db"
	db = SQLite3::Database.new(db_file)
	puts "Opened #{db_file}"
	db.results_as_hash = true
	setupDB(db, csv_file)
	storySizeAndAgePerSprint(db, "1_sizeandage_#{csv_file}")
	storySizePerSprint(db, false, "2a_sizepersprint_#{csv_file}")
	storySizePerSprint(db, true, "2b_sizepercentpersprint_#{csv_file}")
	typePerSprint(db, false, "3a_typepersprint_#{csv_file}")
	typePerSprint(db, true, "3b_typepercentpersprint_#{csv_file}")
	allLabels(db, "4_labels_#{csv_file}")
	labelsPerSprint(db, false, labels, "5a_labelspersprint_#{csv_file}")
	labelsPerSprint(db, true, labels, "5b_labelspercentpersprint_#{csv_file}")
	averageAgeBySize(db, "6_agebysize_#{csv_file}")
	ages(db, "7_ages_#{csv_file}")
	lengths(db, "8_lengths_#{csv_file}")

	db.close