Last active
August 29, 2015 13:56
-
-
Save ericlevine/9128297 to your computer and use it in GitHub Desktop.
GitHub to GitHub repository migration script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "json" | |
require "typhoeus" | |
require "pp" | |
# MigrateRepo usage instructions: | |
# | |
# The way this is used at Airbnb for migrating from GitHub to GHE is by | |
# creating a separate organization with no members called "migration", | |
# adding that as the target organization, and then once the migration | |
# has finished, moving it over to the intended location. | |
# | |
# The reason why we do it this way is so that people who are auto- | |
# subscribed to the intended organization are not spammed by potentially | |
# thousands of comments in the source repository. | |
# | |
# The sleep time is arbitrarily chosen based on empirical evidence that | |
# lower times cause huge backups in resque jobs and general GHE | |
# shittiness. | |
# | |
# Generally speaking, the source api host will be api.github.com, the | |
# source host will be github.com and the source prefix will be empty. | |
# | |
# The target api host and the target host will be your GHE deployment | |
# domain. Your target prefix will be "/api/v3". | |
# | |
# There is a known failure case on comments where the basis commit was | |
# lost in a force push. In this case, it will say "Server Error" and | |
# will move on to the next comment. This is unfortunately expected in | |
# many circumstances. Any write requests to the target will be spit | |
# out at the end of the run and you can choose to keep the output for | |
# posterity or to discard it if you really don't care. | |
# | |
# Don't use this tool on existing repos. It is intended to be used | |
# on repos that are moved over for the first time from another | |
# GitHub organization. | |
# | |
# You should not expect this software to work, and you should not | |
# expect it to do anything but leave your repositories in a | |
# completely inconsistent, awful, destroyed state. Use at your own | |
# risk. Turn back while you still can. Seriously. | |
class MigrateRepo | |
HOOK_URLS = [] | |
SOURCE_ORGANIZATION = "" | |
SOURCE_API_HOST = "" | |
SOURCE_HOST = "" | |
SOURCE_PREFIX = "" | |
TARGET_ORGANIZATION = "" | |
TARGET_API_HOST = "" | |
TARGET_HOST = "" | |
TARGET_PREFIX = "" | |
SLEEP_TIME = 1 | |
def self.migrate(source_auth_token, target_auth_token, repo) | |
source = GithubHost.new( | |
:host => SOURCE_HOST, | |
:api_host => SOURCE_API_HOST, | |
:prefix => SOURCE_PREFIX, | |
:token => source_auth_token, | |
:org => SOURCE_ORGANIZATION, | |
:repo => repo, | |
) | |
target = GithubHost.new( | |
:host => TARGET_HOST, | |
:api_host => TARGET_API_HOST, | |
:prefix => TARGET_PREFIX, | |
:token => target_auth_token, | |
:org => TARGET_ORGANIZATION, | |
:repo => repo, | |
) | |
old_repo = source.request( | |
"/repos/#{SOURCE_ORGANIZATION}/#{repo}", | |
:method => :get, | |
) | |
# Create the repo and add hooks, content | |
self.create_repo(target, old_repo) | |
self.add_hooks(target, HOOK_URLS) | |
self.push_content(source, target) | |
# Create a mapping of all pull requests from number to content | |
pulls = self.pull_requests(source) | |
pull_map = pulls.inject({}){|h, p| h[p["number"]] = p; h} | |
processors = [] | |
self.issues(source).each do |old_issue| | |
puts "Processing issue #{old_issue["number"]}" | |
# Create the issue | |
issue = self.add_issue(target, old_issue) | |
# ActionProcessor allows us to perform one action per issue per second | |
processor = ActionProcessor.new(target) | |
processors << processor | |
# Check to see if it has a pull request | |
has_pull = pull_map.include?(old_issue["number"]) | |
# Get all comments in the issue | |
comments = self.comments(source, old_issue, has_pull) | |
# Add the pull request iff one exists on the original issue | |
if has_pull | |
processor.add_action( | |
:add_pull, | |
:issue => issue, | |
:old_pull => pull_map[old_issue["number"]], | |
) | |
end | |
comments.each do |comment| | |
if comment["commit_id"] | |
# A commit id means that this is a PR comment | |
processor.add_action( | |
:add_pull_comment, | |
:issue => issue, | |
:comment => comment, | |
) | |
else | |
# This is an issue comment | |
processor.add_action( | |
:add_issue_comment, | |
:issue => issue, | |
:comment => comment, | |
) | |
end | |
end | |
# Finally, close the issue if the original one was already closed | |
if old_issue["state"] != "open" | |
processor.add_action( | |
:close_issue, | |
:issue => issue, | |
:state => old_issue["state"], | |
) | |
end | |
# Process a single action per issue opened. | |
processors.each(&:process_action) | |
processors.reject!{|p| p.empty?} | |
sleep(SLEEP_TIME) | |
end | |
until processors.empty? | |
processors.each(&:process_action) | |
processors.reject!{|p| p.empty?} | |
sleep(SLEEP_TIME) | |
end | |
pp target.failures | |
end | |
def self.create_repo(target, old_repo) | |
target.request( | |
"/orgs/#{target.org}/repos", | |
:method => :post, | |
:body => { | |
"name" => target.repo, | |
"description" => old_repo["description"], | |
"private" => true, | |
}.to_json, | |
) | |
end | |
def self.add_hooks(target, hooks) | |
HOOK_URLS.each do |hook_url| | |
target.request( | |
"/repos/#{target.org}/#{target.repo}/hooks", | |
:method => :post, | |
:body => { | |
"name" => "web", | |
"active" => true, | |
"config" => { | |
"url" => hook_url, | |
"content_type" => "json", | |
}, | |
}.to_json, | |
) | |
end | |
end | |
def self.push_content(source, target) | |
Dir.mktmpdir(source.repo) do |dir| | |
`git clone --mirror git@#{source.host}:#{source.org}/#{source.repo}.git #{dir}` | |
`git --git-dir=#{dir} remote add target git@#{target.host}:#{target.org}/#{target.repo}.git` | |
`git --git-dir=#{dir} push --all target` | |
`git --git-dir=#{dir} push --tags target` | |
end | |
end | |
def self.add_issue(target, old_issue) | |
issue = target.request( | |
"/repos/#{target.org}/#{target.repo}/issues", | |
:method => :post, | |
:body => { | |
:title => old_issue["title"], | |
:body => annotated_body(old_issue), | |
}.to_json, | |
) | |
end | |
def self.close_issue(target, issue, state) | |
# Close the issue | |
target.request( | |
"/repos/#{target.org}/#{target.repo}/issues/#{issue["number"]}", | |
:method => :patch, | |
:body => { | |
:state => state, | |
}.to_json, | |
) | |
end | |
def self.add_issue_comment(target, issue, comment) | |
target.request( | |
"/repos/#{target.org}/#{target.repo}/issues/#{issue["number"]}/comments", | |
:method => :post, | |
:body => { | |
:body => annotated_body(comment), | |
}.to_json, | |
) | |
end | |
def self.add_pull(target, issue, old_pull) | |
if old_pull["state"] == "open" | |
# If the PR is open, assume the refs are present | |
base = old_pull["base"]["ref"] | |
head = old_pull["head"]["ref"] | |
else | |
# If the PR is closed, fall back to SHA | |
base = old_pull["base"]["sha"] | |
head = old_pull["head"]["sha"] | |
end | |
pull = target.request( | |
"/repos/#{target.org}/#{target.repo}/pulls", | |
:method => :post, | |
:body => { | |
:issue => issue["number"], | |
:base => base, | |
:head => head, | |
}.to_json, | |
) | |
end | |
def self.add_pull_comment(target, issue, comment) | |
target.request( | |
"/repos/#{target.org}/#{target.repo}/pulls/#{issue["number"]}/comments", | |
:method => :post, | |
:body => { | |
:body => annotated_body(comment), | |
:commit_id => comment["original_commit_id"], | |
:path => comment["path"], | |
:position => comment["original_position"], | |
}.to_json, | |
) | |
end | |
def self.annotated_body(original) | |
author = original["user"]["login"] | |
timestamp = original["created_at"] | |
body = original["body"] | |
"Originally posted by #{author} at #{timestamp}:\n\n#{body}" | |
end | |
def self.issues(source) | |
open = source.get_all( | |
"/repos/#{source.org}/#{source.repo}/issues", | |
:params => { | |
:state => "open", | |
}, | |
) | |
closed = source.get_all( | |
"/repos/#{source.org}/#{source.repo}/issues", | |
:params => { | |
:state => "closed", | |
}, | |
) | |
(open + closed).sort{|a, b| a["created_at"] <=> b["created_at"]} | |
end | |
def self.pull_requests(source) | |
open = source.get_all( | |
"/repos/#{source.org}/#{source.repo}/pulls", | |
:params => { | |
:state => "open", | |
}, | |
) | |
closed = source.get_all( | |
"/repos/#{source.org}/#{source.repo}/pulls", | |
:params => { | |
:state => "closed", | |
}, | |
) | |
(open + closed).sort{|a, b| a["created_at"] <=> b["created_at"]} | |
end | |
def self.comments(source, issue, has_pull) | |
comments = source.get_all( | |
"/repos/#{source.org}/#{source.repo}/issues/#{issue["number"]}/comments", | |
:params => { | |
:sort => 'created', | |
}, | |
) | |
if has_pull | |
comments += source.get_all( | |
"/repos/#{source.org}/#{source.repo}/pulls/#{issue["number"]}/comments", | |
:params => { | |
:sort => 'created', | |
}, | |
) | |
end | |
comments.sort{|a, b| a["created_at"] <=> b["created_at"]} | |
end | |
class ActionProcessor | |
def initialize(target) | |
@target = target | |
@actions = [] | |
end | |
def add_action(action, options = {}) | |
@actions << [action, options] | |
end | |
def process_action | |
action, options = @actions.shift | |
case action | |
when :add_pull | |
MigrateRepo.add_pull(@target, options[:issue], options[:old_pull]) | |
when :add_pull_comment | |
MigrateRepo.add_pull_comment(@target, options[:issue], options[:comment]) | |
when :add_issue_comment | |
MigrateRepo.add_issue_comment(@target, options[:issue], options[:comment]) | |
when :close_issue | |
MigrateRepo.close_issue(@target, options[:issue], options[:state]) | |
end | |
end | |
def empty? | |
@actions.empty? | |
end | |
end | |
class GithubHost | |
attr_reader :host, :org, :failures, :repo | |
def initialize(options = {}) | |
@auth_token = options[:token] | |
@api_host = options[:api_host] | |
@host = options[:host] | |
@prefix = options[:prefix] | |
@org = options[:org] | |
@repo = options[:repo] | |
@failures = [] | |
end | |
def request(path, options = {}) | |
tries = 0 | |
response = nil | |
while tries <= 3 | |
tries += 1 | |
options[:headers] = {"Authorization" => "token #{@auth_token}"} | |
request = Typhoeus::Request.new( | |
"https://#{@api_host}#{@prefix}#{path}", | |
options, | |
) | |
response = request.run | |
if response.code < 300 | |
return response.body.empty? ? "" : JSON.parse(response.body) | |
end | |
end | |
puts "Error: #{response.body}" | |
@failures << [path, options, response.code] | |
nil | |
end | |
def get_all(path, options = {}) | |
page = 1 | |
all_items = [] | |
while true | |
items = request( | |
path, | |
:method => :get, | |
:params => options[:params].merge({:page => page}), | |
) | |
break if items.empty? | |
all_items += items | |
page += 1 | |
end | |
all_items | |
end | |
end | |
end | |
if ARGV.length < 3 | |
puts "ruby migrate_repo.rb <source_auth_token> <target_auth_token> <repo_name>" | |
else | |
MigrateRepo.migrate(ARGV[0], ARGV[1], ARGV[2]) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment