Created
September 28, 2021 18:24
-
-
Save danicuki/bf8abfe35b15bc28e5864472e7e1e10e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# frozen_string_literal: true | |
def repo_commits(repo) | |
`cd #{repo} && git pull origin` | |
result = `cd #{repo} && git --no-pager log --pretty='format:%h;%an;%ae;%ad;%(trailers:key=Co-authored-by)' --date=short --shortstat --no-merges` | |
commits = result.unpack('C*').pack('U*').split("\n").delete_if(&:empty?).delete_if { |x| x =~ /^Co-authored-by/ } | |
commits = commits.reject.each_with_index { |e, i| e =~ /;/ && (commits[i + 1] =~ /;/ || commits[i + 1].nil?) }.each_slice(2).to_a | |
commits.map do |c| | |
changes = c[1] | |
changed = changes.match(/(\d+)[^0-9]*/).to_a.last.to_i | |
inserted = changes.match(/(\d+) insert/).to_a.last.to_i | |
deleted = changes.match(/(\d+) delet/).to_a.last.to_i | |
commit_fields = c[0] | |
email = commit_fields.split(';')[2] | |
email_root = email.match('(.*)@').to_a.last | |
email_root ||= email | |
co_author = commit_fields.split(';')[4].to_s.gsub('Co-authored-by:', '').strip | |
co_author_name = co_author_email = co_author_email_root = '' | |
if !co_author.empty? && co_author.match('<([^>]*)>|([^ <]*@[^ >]*)') | |
co_author_name = co_author.match('(.*) ')[1] | |
match = co_author.match('<([^>]*)>|([^ <]*@[^ >]*)') | |
co_author_email = match[1] || match[2] | |
co_author_email_root = co_author_email.match('(.*)@').to_a.last | |
co_author_email_root ||= co_author_email | |
end | |
commit_fields = commit_fields.split(';').take(4).join(';') | |
"#{repo.gsub('/', '')};#{commit_fields};#{email_root};#{changed};#{inserted};#{deleted};#{co_author_name};#{co_author_email};#{co_author_email_root}" | |
end.join("\n") | |
end | |
def repo_stats | |
puts 'Repositories Stats' | |
File.open('/tmp/x.csv', 'w') do |f| | |
projects = `ls -d */`.split | |
projects.each do |repo| | |
puts repo | |
begin | |
a = repo_commits(repo) | |
f.write("#{a}\n") unless a.empty? | |
rescue Exception => e | |
puts "Repo #{repo} has errors:\n#{e}" | |
end | |
end | |
end | |
puts 'Finished Stats' | |
end | |
def inserts | |
lines = `cat /tmp/x.csv`.split("\n") | |
puts lines.map { |l| 'insert into commits values (' + l.split(';').map { |x| "'#{x}'" }.join(',') + ');' } | |
end | |
require 'rest-client' | |
require 'json' | |
GITHUB_TOKEN = 'YOUR TOKEN' | |
LOCAL_DB_PASSWORD = '' | |
GOOGLE_DB_USER = 'your db user' | |
DATABASE_CSV = 'CSV_FILE' | |
def fetch_all_repos | |
puts 'Fetching all repos...' | |
(1..100).to_a.each do |page| | |
puts "Github repos page #{page}" | |
a = RestClient.get "https://api.github.com/orgs/PRAVALER/repos?per_page=1000&page=#{page}", Authorization: "token #{GITHUB_TOKEN}" | |
json = JSON.parse(a) | |
break if json.empty? | |
json.map { |t| t['name'] }.reject { |r| File.directory?(r) }.each do |r| | |
puts "Fetching #{r}" | |
`git clone [email protected]:PRAVALER/#{r}.git` | |
end | |
end | |
puts 'Done!' | |
end | |
def import_db | |
puts 'Importing data...' | |
`docker cp /tmp/x.csv column-store:/tmp` | |
`mysql -h 127.0.0.1 -u root -P 9999 -p#{LOCAL_DB_PASSWORD} -e "USE pravaler; truncate table commits; LOAD DATA INFILE '/tmp/x.csv' INTO TABLE commits FIELDS TERMINATED BY ';' LINES TERMINATED BY '\n'"` | |
puts 'Imported!' | |
end | |
def import_db_gcloud | |
`cat /tmp/x.csv |sed 's/;/,/g' >/tmp/x2.csv` | |
`gsutil cp /tmp/x2.csv #{DATABASE_CSV}` | |
`mysql -h 127.0.0.1 -u #{GOOGLE_DB_USER} -e "truncate table github_metrics.commits"` | |
`gcloud sql import csv coding-metrics #{DATABASE_CSV} --database=github_metrics --table=commits --quiet` | |
end | |
Dir.chdir 'repositories' | |
#fetch_all_repos | |
#repo_stats | |
import_db_gcloud |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ************************************************************ | |
# Sequel Pro SQL dump | |
# Version 4541 | |
# | |
# http://www.sequelpro.com/ | |
# https://github.com/sequelpro/sequelpro | |
# | |
# Host: 127.0.0.1 (MySQL 5.7.34-google-log) | |
# Database: github_metrics | |
# Generation Time: 2021-09-28 18:21:28 +0000 | |
# ************************************************************ | |
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; | |
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; | |
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; | |
/*!40101 SET NAMES utf8 */; | |
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; | |
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; | |
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; | |
# Dump of table commits | |
# ------------------------------------------------------------ | |
DROP TABLE IF EXISTS `commits`; | |
CREATE TABLE `commits` ( | |
`repo` varchar(255) DEFAULT NULL, | |
`commit_id` varchar(255) DEFAULT NULL, | |
`name` varchar(255) DEFAULT NULL, | |
`email` varchar(255) DEFAULT NULL, | |
`commit_date` datetime DEFAULT NULL, | |
`nickname` varchar(255) DEFAULT NULL, | |
`changed_files` int(11) DEFAULT NULL, | |
`inserted` int(11) DEFAULT NULL, | |
`deleted` int(11) DEFAULT NULL, | |
`co_author_name` varchar(255) DEFAULT NULL, | |
`co_author_email` varchar(255) DEFAULT NULL, | |
`co_author_nickname` varchar(255) DEFAULT NULL | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
# Dump of table dev_links | |
# ------------------------------------------------------------ | |
DROP TABLE IF EXISTS `dev_links`; | |
CREATE TABLE `dev_links` ( | |
`original_nickname` varchar(255) DEFAULT NULL, | |
`real_nickname` varchar(255) DEFAULT NULL, | |
`names` varchar(255) DEFAULT NULL | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
# Dump of table devs | |
# ------------------------------------------------------------ | |
DROP TABLE IF EXISTS `devs`; | |
CREATE TABLE `devs` ( | |
`nickname` varchar(255) DEFAULT NULL, | |
`manager` varchar(255) DEFAULT NULL, | |
`team` varchar(255) DEFAULT NULL | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
# Dump of table projects | |
# ------------------------------------------------------------ | |
DROP TABLE IF EXISTS `projects`; | |
CREATE TABLE `projects` ( | |
`repo` varchar(255) NOT NULL, | |
`project` varchar(255) DEFAULT NULL, | |
PRIMARY KEY (`repo`) | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; | |
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; | |
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; | |
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; | |
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; | |
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment