Skip to content

Instantly share code, notes, and snippets.

@danicuki
Created September 28, 2021 18:24
Show Gist options
  • Save danicuki/bf8abfe35b15bc28e5864472e7e1e10e to your computer and use it in GitHub Desktop.
Save danicuki/bf8abfe35b15bc28e5864472e7e1e10e to your computer and use it in GitHub Desktop.
# frozen_string_literal: true
def repo_commits(repo)
`cd #{repo} && git pull origin`
result = `cd #{repo} && git --no-pager log --pretty='format:%h;%an;%ae;%ad;%(trailers:key=Co-authored-by)' --date=short --shortstat --no-merges`
commits = result.unpack('C*').pack('U*').split("\n").delete_if(&:empty?).delete_if { |x| x =~ /^Co-authored-by/ }
commits = commits.reject.each_with_index { |e, i| e =~ /;/ && (commits[i + 1] =~ /;/ || commits[i + 1].nil?) }.each_slice(2).to_a
commits.map do |c|
changes = c[1]
changed = changes.match(/(\d+)[^0-9]*/).to_a.last.to_i
inserted = changes.match(/(\d+) insert/).to_a.last.to_i
deleted = changes.match(/(\d+) delet/).to_a.last.to_i
commit_fields = c[0]
email = commit_fields.split(';')[2]
email_root = email.match('(.*)@').to_a.last
email_root ||= email
co_author = commit_fields.split(';')[4].to_s.gsub('Co-authored-by:', '').strip
co_author_name = co_author_email = co_author_email_root = ''
if !co_author.empty? && co_author.match('<([^>]*)>|([^ <]*@[^ >]*)')
co_author_name = co_author.match('(.*) ')[1]
match = co_author.match('<([^>]*)>|([^ <]*@[^ >]*)')
co_author_email = match[1] || match[2]
co_author_email_root = co_author_email.match('(.*)@').to_a.last
co_author_email_root ||= co_author_email
end
commit_fields = commit_fields.split(';').take(4).join(';')
"#{repo.gsub('/', '')};#{commit_fields};#{email_root};#{changed};#{inserted};#{deleted};#{co_author_name};#{co_author_email};#{co_author_email_root}"
end.join("\n")
end
def repo_stats
puts 'Repositories Stats'
File.open('/tmp/x.csv', 'w') do |f|
projects = `ls -d */`.split
projects.each do |repo|
puts repo
begin
a = repo_commits(repo)
f.write("#{a}\n") unless a.empty?
rescue Exception => e
puts "Repo #{repo} has errors:\n#{e}"
end
end
end
puts 'Finished Stats'
end
def inserts
lines = `cat /tmp/x.csv`.split("\n")
puts lines.map { |l| 'insert into commits values (' + l.split(';').map { |x| "'#{x}'" }.join(',') + ');' }
end
require 'rest-client'
require 'json'
GITHUB_TOKEN = 'YOUR TOKEN'
LOCAL_DB_PASSWORD = ''
GOOGLE_DB_USER = 'your db user'
DATABASE_CSV = 'CSV_FILE'
def fetch_all_repos
puts 'Fetching all repos...'
(1..100).to_a.each do |page|
puts "Github repos page #{page}"
a = RestClient.get "https://api.github.com/orgs/PRAVALER/repos?per_page=1000&page=#{page}", Authorization: "token #{GITHUB_TOKEN}"
json = JSON.parse(a)
break if json.empty?
json.map { |t| t['name'] }.reject { |r| File.directory?(r) }.each do |r|
puts "Fetching #{r}"
`git clone [email protected]:PRAVALER/#{r}.git`
end
end
puts 'Done!'
end
def import_db
puts 'Importing data...'
`docker cp /tmp/x.csv column-store:/tmp`
`mysql -h 127.0.0.1 -u root -P 9999 -p#{LOCAL_DB_PASSWORD} -e "USE pravaler; truncate table commits; LOAD DATA INFILE '/tmp/x.csv' INTO TABLE commits FIELDS TERMINATED BY ';' LINES TERMINATED BY '\n'"`
puts 'Imported!'
end
def import_db_gcloud
`cat /tmp/x.csv |sed 's/;/,/g' >/tmp/x2.csv`
`gsutil cp /tmp/x2.csv #{DATABASE_CSV}`
`mysql -h 127.0.0.1 -u #{GOOGLE_DB_USER} -e "truncate table github_metrics.commits"`
`gcloud sql import csv coding-metrics #{DATABASE_CSV} --database=github_metrics --table=commits --quiet`
end
Dir.chdir 'repositories'
#fetch_all_repos
#repo_stats
import_db_gcloud
# ************************************************************
# Sequel Pro SQL dump
# Version 4541
#
# http://www.sequelpro.com/
# https://github.com/sequelpro/sequelpro
#
# Host: 127.0.0.1 (MySQL 5.7.34-google-log)
# Database: github_metrics
# Generation Time: 2021-09-28 18:21:28 +0000
# ************************************************************
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
# Dump of table commits
# ------------------------------------------------------------
DROP TABLE IF EXISTS `commits`;
CREATE TABLE `commits` (
`repo` varchar(255) DEFAULT NULL,
`commit_id` varchar(255) DEFAULT NULL,
`name` varchar(255) DEFAULT NULL,
`email` varchar(255) DEFAULT NULL,
`commit_date` datetime DEFAULT NULL,
`nickname` varchar(255) DEFAULT NULL,
`changed_files` int(11) DEFAULT NULL,
`inserted` int(11) DEFAULT NULL,
`deleted` int(11) DEFAULT NULL,
`co_author_name` varchar(255) DEFAULT NULL,
`co_author_email` varchar(255) DEFAULT NULL,
`co_author_nickname` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
# Dump of table dev_links
# ------------------------------------------------------------
DROP TABLE IF EXISTS `dev_links`;
CREATE TABLE `dev_links` (
`original_nickname` varchar(255) DEFAULT NULL,
`real_nickname` varchar(255) DEFAULT NULL,
`names` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
# Dump of table devs
# ------------------------------------------------------------
DROP TABLE IF EXISTS `devs`;
CREATE TABLE `devs` (
`nickname` varchar(255) DEFAULT NULL,
`manager` varchar(255) DEFAULT NULL,
`team` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
# Dump of table projects
# ------------------------------------------------------------
DROP TABLE IF EXISTS `projects`;
CREATE TABLE `projects` (
`repo` varchar(255) NOT NULL,
`project` varchar(255) DEFAULT NULL,
PRIMARY KEY (`repo`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment