Skip to content

Instantly share code, notes, and snippets.

@adrianlzt
Created October 28, 2022 11:31
Show Gist options
  • Save adrianlzt/fb1ceaa6937e59d4e5ad4d8f4947801a to your computer and use it in GitHub Desktop.
Save adrianlzt/fb1ceaa6937e59d4e5ad4d8f4947801a to your computer and use it in GitHub Desktop.
Script to upgrade gitlab from one version to the latest, using a backup and docker
#!/usr/bin/env python
# -*- coding: utf-8 -*
# vim:fenc=utf-8
#
# Script to upgrade gitlab from one version to the latest, using a backup and docker.
#
import os
import sys
import argparse
import time
import re
import semver
import docker
import requests
import logging
FORMAT = "[%(asctime)s %(levelname)s %(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
# Custom Exception for migration command
class MigrationException(Exception):
pass
class Main:
"""Starting with a backup of Gitlab, migrate the data to the latest version of Gitlab
This script uses docker to run the gitlab container and migrate the data.
Data and config dirs are preserved in local paths.
The upgrade path should be obtained from https://gitlab-com.gitlab.io/support/toolbox/upgrade-path/
Steps:
1. Restore backup in the same version of Gitlab
2. Once the web server is working, check health and stop if everything is oka
3. Run the next version of Gitlab in the upgrade path
4. Wait until migrations tasks are finished
5. Check health and stop if everything is ok
6. Repeat steps 3-5 until the latest version is reached
"""
def __init__(self, args):
self.args = args
self.docker_client = docker.from_env()
def run(self):
logger.info(sys._getframe().f_code.co_name)
self.gitlab_dir = self._create_dirs()
if self.args.backup_file is not None:
self._restore_backup(self.args.backup_file, self.args.backup_gitlab_version)
logger.info("Restore backup finished")
version_list = self._parse_upgrade_path(self.args.upgrade_path)
for version in version_list:
start_time = time.time()
self._upgrade_gitlab(version)
end_time = time.time()
logger.info(f"Upgrade to version {version} finished in {(end_time - start_time)/60} minutes")
def _upgrade_gitlab(self, gitlab_version):
"""Upgrade gitlab to the specified version
1. Start the container
2. Wait until the web server is ready
3. Wait for migrations
4. Check health
5. Stop the container
6. Remove the container
"""
logger.info(sys._getframe().f_code.co_name)
if not self.args.ignore_start_container:
self._start_gitlab_container(gitlab_version)
self._wait_until_ready()
if not self.args.ignore_migrations:
# Try to execute _wait_migrations, allow 3 retries
# In version 12.10.14-ce.0 I found the command failing in the script but working manually, probably a time condition
retries = 3
while retries > 0:
try:
self._wait_migrations(gitlab_version)
break
except MigrationException:
retries -= 1
if retries == 0:
raise
time.sleep(30)
parsed_version = semver.VersionInfo.parse(gitlab_version)
# Before moving to version 14 we need to migrate to hashed storage
if parsed_version.major == 13:
# No need to move repos to RO? Was an error of not letting the migrations run completely?
# # There is an error with the migration to hashed storage, it fails if there are read-only projects.
# # Use postgres to get all ro projects and change them to rw
# logger.info("Changing read-only projects to read-write")
# get_ro_projects = """/opt/gitlab/embedded/bin/psql -h /var/opt/gitlab/postgresql -U gitlab -d gitlabhq_production -XAt -F , -c \"select id,namespace_id,name from projects where repository_read_only = 't';\""""
# # Exec get_ro_projects in the gitlab container as user git
# rc, output_ro = self.docker_client.containers.get("gitlab").exec_run(get_ro_projects, user="git")
# with open("ro_projects.txt", "w") as f:
# f.write(output_ro.decode("utf-8"))
# if rc != 0:
# raise Exception("Error getting RO projects")
# move_to_rw = """su -c "/opt/gitlab/embedded/bin/psql -h /var/opt/gitlab/postgresql -U gitlab gitlabhq_production -c \"update projects set repository_read_only = 'f';\"" git"""
# rc, output = self.docker_client.containers.get("gitlab").exec_run(move_to_rw)
# if rc != 0:
# raise Exception("Error moving all projects to RW")
# Run command "gitlab-rake gitlab:storage:migrate_to_hashed" and get rc and output
rc, output = self.docker_client.containers.get("gitlab").exec_run("gitlab-rake gitlab:storage:migrate_to_hashed")
# Save output to a file
with open(f"gitlab-storage-migrate_to_hashed-{gitlab_version}.log", "w") as f:
f.write(output.decode("utf-8"))
if rc != 0 and "Nothing to do" not in output.decode("utf-8"):
raise Exception("Error running gitlab-rake gitlab:storage:migrate_to_hashed in gitlab container")
# Time to wait for the migrate_to_hashed jobs to finish. For our use case, it took around 10s
time.sleep(60)
# # Restore RO projects
# logger.info("Restoring read-only projects")
# # Get the id of the projects from the output_ro variable
# ro_projects = [int(x.split(",")[0]) for x in output_ro.decode("utf-8").splitlines()]
# # Execute a update SQL query to set the projects to RO using "id IN (1,2,3)"
# update_ro_projects = f"""su -c "/opt/gitlab/embedded/bin/psql -h /var/opt/gitlab/postgresql -U gitlab gitlabhq_production -c \"update projects set repository_read_only = 't' where id IN ({','.join(str(x) for x in ro_projects)});\"" git"""
# rc, output = self.docker_client.containers.get("gitlab").exec_run(update_ro_projects)
# if rc != 0:
# raise Exception("Error restoring RO projects")
self._check_health(gitlab_version)
self._stop_and_remove_gitlab_container()
def _wait_migrations(self, version):
"""Wait until all the migration tasks are finished
The command to check the migration status is different between versions
"""
logger.info(sys._getframe().f_code.co_name)
# Parse version with semver
parsed_version = semver.VersionInfo.parse(version)
# Select migration command based on version
# For verions <12.8, we need to check two commands
cmdbis = None
if parsed_version.major >= 14:
cmd = "gitlab-rails runner -e production 'puts Gitlab::BackgroundMigration.remaining'"
cmdbis = "gitlab-rails runner -e production 'puts Gitlab::Database::BackgroundMigration::BatchedMigration.queued.count'"
elif parsed_version.major > 12 or (parsed_version.major == 12 and parsed_version.minor > 8):
cmd = "gitlab-rails runner -e production 'puts Gitlab::BackgroundMigration.remaining'"
else:
cmd = "gitlab-rails runner -e production 'puts Sidekiq::Queue.new(\"background_migration\").size'"
cmdbis = ['/bin/bash', '-c', "echo $'Sidekiq::ScheduledSet.new.select' | gitlab-rails console"]
# Unable to make this work, the quote for BackgroundMigrationWorker is not beign shown
#cmdbis = ['sh', '-c', "echo 'puts \"size=#{Sidekiq::ScheduledSet.new.select { |r| r.klass == 'BackgroundMigrationWorker' }.size}\"' | gitlab-rails console | grep ^size= | cut -d '=' -f 2"]
# Avoid exiting the migration loop if the the command has never seen a migration in the queue
migrations_seen = False
start_time = time.time()
while True:
# Run cmd in the gitlab container and get the rc and output
rc, output = self.docker_client.containers.get("gitlab").exec_run(cmd)
if rc != 0:
raise MigrationException("Error running migration command in gitlab container")
logger.debug(f"Migration output: {output.decode('utf-8').strip()}")
output_int = int(output)
# If cmdbis is not None, we need to check a second command for the queue status.
# If both queues are 0, we can exit the loop
if cmdbis:
rc2, output2 = self.docker_client.containers.get("gitlab").exec_run(cmdbis)
if rc2 != 0:
raise MigrationException("Error running migration bis command in gitlab container")
logger.debug(f"Migration output bis: {output2.decode('utf-8').strip()}")
# For gitlab-rails console command extract size from the output.
# Use regex multiline to extract the size from "@_size=0>"
if isinstance(cmdbis, list):
output2_int = int(re.search(r"@_size=(\d+)>", output2.decode("utf-8"), re.MULTILINE).group(1))
else:
output2_int = int(output2)
output_int += output2_int
if output_int == 0 and migrations_seen:
logger.info("All migrations finished")
break
if output_int != 0 and not migrations_seen:
logger.debug(f"Migration queue not empty: {output_int}")
migrations_seen = True
# Break if migrations_seen=False after 5 minutes
if time.time() - start_time > 300 and not migrations_seen:
# Migrations too fast, command don't see them
if version == "13.1.11":
logger.info("Migration queue not empty after 5 minutes. Ignoring error")
break
else:
logger.warning("No migration seen in 5 minutes, skipping migration wait")
input("Press Enter to continue...")
break
time.sleep(10)
# precaution
time.sleep(30)
def _stop_and_remove_gitlab_container(self):
# Stop the container using docker
logger.info("Stop gitlab container")
self.docker_client.containers.get("gitlab").stop()
# Remove the container using docker
logger.info("Remove gitlab container")
self.docker_client.containers.get("gitlab").remove()
def _create_dirs(self):
"""Create the dirs to store the data and config
Return the full path to the gitlab dir
"""
logger.info(sys._getframe().f_code.co_name)
# Get current working directory
cwd = os.getcwd()
# Join "gitlab" to current working directory and save the result in a variable
gitlab_dir = os.path.join(cwd, "gitlab")
# Create the directory if it doesn't exist
if not os.path.exists(gitlab_dir):
os.makedirs(gitlab_dir)
# Create also gitlab/config and gitlab/data
config_dir = os.path.join(gitlab_dir, "config")
if not os.path.exists(config_dir):
os.makedirs(config_dir)
data_dir = os.path.join(gitlab_dir, "data")
if not os.path.exists(data_dir):
os.makedirs(data_dir)
return gitlab_dir
def _wait_until_ready(self):
logger.info("Wait until gitlab is ready")
while True:
try:
response = requests.get("http://localhost:80/")
if response.status_code == 200 or response.status_code == 302:
break
except requests.exceptions.ConnectionError:
pass
time.sleep(10)
# precaution
time.sleep(30)
def _start_gitlab_container(self, gitlab_version):
logger.info(f"Start gitlab container with version {gitlab_version}")
config_dir = os.path.join(self.gitlab_dir, "config")
data_dir = os.path.join(self.gitlab_dir, "data")
# Use docker to start a container with image gitlab/gitlab-ce:{backup_gitlab_version}-ce.0, in dettached mode,
# with port 80 exposed as port 80, name gitlab, shm-size=256m and local path /gitlab/config as /etc/gitlab and
# local path /gitlab/data as /var/opt/gitlab
self.docker_client.containers.run(
f"gitlab/gitlab-ce:{gitlab_version}-ce.0",
detach=True,
name="gitlab",
ports={"80": "80"},
shm_size="256m",
volumes={
config_dir: {"bind": "/etc/gitlab", "mode": "rw"},
data_dir: {"bind": "/var/opt/gitlab", "mode": "rw"},
},
)
def _restore_backup(self, backup_file, backup_gitlab_version):
"""Restore the backup in the same version of Gitlab
This is the first step of the upgrade process
"""
logger.info(sys._getframe().f_code.co_name)
self._start_gitlab_container(backup_gitlab_version)
# Wait until the web server is ready
# Use requests to check the health of the web server
# Use a loop to check every 5 seconds until the web server is ready
self._wait_until_ready()
logger.info("Copy the backup file")
# Create variable with the full path to the backup file, joining the current working directory with the backup file
backup_file_path = os.path.join(os.getcwd(), backup_file)
# Copy the backup file to the container, using os.system, checking the return code
# Using docker-py requires to create a new tar file with the backup
ret = os.system(f"docker cp {backup_file_path} gitlab:/var/opt/gitlab/backups/")
if ret != 0:
raise Exception("Error copying backup file")
# Stop puma and sidekiq processes
logger.info("Stop puma and sidekiq processes")
# Use docker to execute the command "gitlab-ctl stop puma" in the container gitlab
rc, _ = self.docker_client.containers.get("gitlab").exec_run("gitlab-ctl stop puma")
if rc != 0:
raise Exception("Error stopping puma")
# Use docker to execute the command "gitlab-ctl stop sidekiq" in the container gitlab
rc, _ = self.docker_client.containers.get("gitlab").exec_run("gitlab-ctl stop sidekiq")
if rc != 0:
raise Exception("Error stopping sidekiq")
# Wait 10s as precaution
time.sleep(10)
logger.info("Restore the backup")
# Restore the backup
# Use docker to run the command gitlab-rake gitlab:backup:restore in the container gitlab
rc, output = self.docker_client.containers.get("gitlab").exec_run(
"gitlab-rake gitlab:backup:restore force=yes"
)
# Save the output to a file restore-backup.log
with open("restore-backup.log", "w") as f:
f.write(output.decode("utf-8"))
if rc != 0:
raise Exception("Error restoring backup")
logger.info("Restart gitlab container")
# Restart the container
# Use docker to restart the container gitlab
self.docker_client.containers.get("gitlab").restart()
self._wait_until_ready()
self._check_health(backup_gitlab_version)
self._stop_and_remove_gitlab_container()
def _check_health(self, version):
"""Fail if the health check fails
Also store the output of the health check in a file for later use
in case of failure.
"""
logger.info("Check health")
rc, output = self.docker_client.containers.get("gitlab").exec_run(
"gitlab-rake gitlab:check SANITIZE=true"
)
# Save the output in file check-health-{backup_gitlab_version}.txt
with open(f"check-health-{version}.log", "w") as f:
f.write(output.decode("utf-8"))
if rc != 0:
raise Exception("Error checking health")
def _parse_upgrade_path(self, upgrade_path):
"""parse the upgrade path
Format is: 15.0.5 => 15.4.3 => 15.5.1
Return a list with the versions
"""
logger.info(sys._getframe().f_code.co_name)
version_list = upgrade_path.split(" => ")
return version_list
def parse_args(argv):
p = argparse.ArgumentParser(description='Script to upgrade gitlab.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
p.add_argument('-v', '--verbose', dest='verbose', action='count', default=0,
help='verbose output. specify for debug-level output.')
p.add_argument("-b", "--backup-file", action="store", dest="backup_file", required=False,
help="Path to the backup file. If not defined it will assume a gitlab directory with the data and config files")
p.add_argument("-bv", "--backup-gitlab-version", action="store", dest="backup_gitlab_version",
help="Gitlab version where the backup was taken", default="10.3.6")
p.add_argument("-up", "--upgrade-path", action="store",
help="Upgrade path, taken from https://gitlab-com.gitlab.io/support/toolbox/upgrade-path/. Format: 15.0.5 => 15.4.3 => 15.5.1",
default="10.8.7 => 11.11.8 => 12.0.12 => 12.1.17 => 12.10.14 => 13.0.14 => 13.1.11 => 13.8.8 => 13.12.15 => 14.0.12 => 14.3.6 => 14.9.5 => 14.10.5 => 15.0.5 => 15.4.3 => 15.5.1")
p.add_argument("--ignore-start-container", action="store_true",
help="Do not start the container. Flag to debug errors")
p.add_argument("--ignore-migrations", action="store_true",
help="Do not run migrations. Flag to debug errors")
args = p.parse_args(argv)
return args
if __name__ == "__main__":
args = parse_args(sys.argv[1:])
if args.verbose > 0:
logger.setLevel(logging.DEBUG)
main = Main(args)
main.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment