Forked from fotinakis/Jenkins auto-shudown-slaves job
Last active
December 19, 2017 01:24
-
-
Save jsoref/3ba9be087e25b6a4b6369bc68b4a8855 to your computer and use it in GitHub Desktop.
Auto-managed Jenkins slaves on Google Compute Engine
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import logging | |
import os | |
import sys | |
import time | |
import argparse | |
import httplib2 | |
from os.path import expanduser | |
from oauth2client.client import GoogleCredentials | |
from oauth2client.client import flow_from_clientsecrets | |
from oauth2client.file import Storage | |
from oauth2client import tools | |
from oauth2client.tools import run_flow | |
from apiclient.discovery import build | |
# New instance properties | |
DEFAULT_MACHINE_TYPE = 'n1-standard-4' | |
DEFAULT_NETWORK = 'default' | |
DEFAULT_SERVICE_EMAIL = 'default' | |
DEFAULT_SCOPES = ['https://www.googleapis.com/auth/devstorage.full_control', | |
'https://www.googleapis.com/auth/compute'] | |
# New root persistent disk properties | |
DEFAULT_SNAPSHOT = 'your-snapshot-base-image' | |
DEFAULT_ZONE = 'us-central1-b' | |
API_VERSION = 'v1' | |
GCE_URL = 'https://www.googleapis.com/compute/%s/projects/' % (API_VERSION) | |
PROJECT_ID = 'your-project-id-here' | |
OAUTH_STORE = expanduser("~/.jenkins-agent-manager") | |
CLIENT_SECRETS = os.path.join(OAUTH_STORE, 'client_secrets.json') | |
OAUTH2_STORAGE = os.path.join(OAUTH_STORE, 'oauth2.dat') | |
GCE_SCOPE = 'https://www.googleapis.com/auth/compute' | |
def main(argv): | |
logging.basicConfig(level=logging.WARN) | |
# Print to stderr because Jenkins agent output is funky. | |
print >> sys.stderr, 'Starting script...' | |
parser = argparse.ArgumentParser( | |
description=__doc__, | |
formatter_class=argparse.RawDescriptionHelpFormatter, | |
parents=[tools.argparser]) | |
parser.add_argument('action', choices=['up', 'down', 'list']) | |
parser.add_argument('--instance_name', required=False) | |
parser.add_argument('--debug', required=False, action='store_true') | |
# Parse the command-line flags. | |
flags = parser.parse_args(argv[1:]) | |
if not flags.instance_name and flags.action in ['up', 'down']: | |
parser.print_help() | |
sys.exit() | |
instance_name = flags.instance_name | |
if flags.action in ['up', 'down'] and not instance_name.startswith('node'): | |
print >> sys.stderr, "This script is restricted to build nodes, not %s" % instance_name | |
sys.exit(-1) | |
if flags.debug: | |
logging.basicConfig(level=logging.DEBUG) | |
httplib2.debuglevel = 2 | |
if True: | |
# Perform OAuth 2.0 authorization. | |
flow = flow_from_clientsecrets(CLIENT_SECRETS, scope=GCE_SCOPE) | |
storage = Storage(OAUTH2_STORAGE) | |
credentials = storage.get() | |
else: | |
credentials = GoogleCredentials.get_application_default() | |
print >> sys.stderr, 'Checking for stored OAuth2 credentials...' | |
print >> sys.stderr, 'If agent startup hangs here, you probably need to manually login and run the agent.up script once to populate oauth2.dat.' | |
if credentials is None or credentials.invalid: | |
credentials = run_flow(flow, storage, flags) | |
http = httplib2.Http() | |
auth_http = credentials.authorize(http) | |
print >> sys.stderr, 'Got credentials!' | |
# Build the service | |
gce_service = build('compute', API_VERSION, cache_discovery=False) | |
project_url = '%s%s' % (GCE_URL, PROJECT_ID) | |
# Construct URLs | |
zone_url = '%s/zones/%s' % (project_url, DEFAULT_ZONE) | |
disk_source_url = '%s/zones/%s/disks/%s' % ( | |
project_url, DEFAULT_ZONE, instance_name) # Disk name matches instance name. | |
machine_type_url = '%s/zones/%s/machineTypes/%s' % ( | |
project_url, DEFAULT_ZONE, DEFAULT_MACHINE_TYPE) | |
network_url = '%s/global/networks/%s' % (project_url, DEFAULT_NETWORK) | |
def list_instances(): | |
# List instances | |
request = gce_service.instances().list(project=PROJECT_ID, filter=None, zone=DEFAULT_ZONE) | |
response = request.execute(http=auth_http) | |
if response and 'items' in response: | |
instances = response['items'] | |
return [instance for instance in instances if instance['name'].startswith('node')] | |
else: | |
return [] | |
def up(): | |
# Skip startup if agent is already up. | |
instances = list_instances() | |
instance = [instance for instance in instances if instance['name'] == instance_name] | |
instance = instance[0] if instance else None | |
if instance and instance['status'] == 'TERMINATED': | |
if True: | |
print >> sys.stderr, 'Agent "%s" already exists but is TERMINATED. Starting instance...' % instance_name | |
request = gce_service.instances().start( | |
project=PROJECT_ID, instance=instance_name, zone=_get_zone(instance['zone'])) | |
response = request.execute(http=auth_http) | |
response = _blocking_call(gce_service, auth_http, response) | |
return | |
else: | |
print >> sys.stderr, 'Agent "%s" already exists but is TERMINATED. Deleting instance...' % instance_name | |
down() | |
if instance and instance['status'] == 'RUNNING': | |
sys.exit('Agent "%s" already exists.' % instance_name) | |
print_instances() | |
# Construct the request body | |
instance = { | |
'name': instance_name, | |
'machineType': machine_type_url, | |
'disks': [{ | |
'type': 'PERSISTENT', | |
'boot': 'true', | |
'mode': 'READ_WRITE', | |
'deviceName': instance_name, | |
'zone': zone_url, | |
'source': disk_source_url, | |
'autoDelete': 'false', | |
}], | |
'networkInterfaces': [{ | |
'accessConfigs': [{ | |
'type': 'ONE_TO_ONE_NAT', | |
'name': 'External NAT' | |
}], | |
'network': network_url, | |
}], | |
'serviceAccounts': [{ | |
'email': DEFAULT_SERVICE_EMAIL, | |
'scopes': DEFAULT_SCOPES, | |
}] | |
} | |
# Create the instance. | |
request = gce_service.instances().insert( | |
project=PROJECT_ID, body=instance, zone=DEFAULT_ZONE) | |
response = request.execute(http=auth_http) | |
response = _blocking_call(gce_service, auth_http, response) | |
print >> sys.stderr, response | |
def down(): | |
print_instances() | |
if True: | |
request = gce_service.instances().stop( | |
project=PROJECT_ID, zone=DEFAULT_ZONE, instance=instance_name) | |
else: | |
request = gce_service.instances().delete( | |
project=PROJECT_ID, zone=DEFAULT_ZONE, instance=instance_name) | |
response = request.execute(http=auth_http) | |
response = _blocking_call(gce_service, auth_http, response) | |
print >> sys.stderr, response | |
def print_instances(output=False): | |
handle = sys.stdout if output else sys.stderr | |
print >> handle | |
for instance in list_instances(): | |
print >> handle, instance['name'] | |
print >> handle | |
if flags.action == 'up': | |
up() | |
elif flags.action == 'down': | |
down() | |
elif flags.action == 'list': | |
print_instances(True) | |
else: | |
raise Exception('Invalid action: %s' % flags.action) | |
def _get_zone(url): | |
return url.split('/')[-1] | |
def _blocking_call(gce_service, auth_http, response): | |
"""Blocks until the operation status is done for the given operation.""" | |
status = response['status'] | |
while status != 'DONE' and response: | |
operation_id = response['name'] | |
# Identify if this is a per-zone resource | |
if 'zone' in response: | |
zone_name = _get_zone(response['zone']) | |
request = gce_service.zoneOperations().get( | |
project=PROJECT_ID, | |
operation=operation_id, | |
zone=zone_name) | |
else: | |
request = gce_service.globalOperations().get( | |
project=PROJECT_ID, operation=operation_id) | |
response = request.execute(http=auth_http) | |
if response: | |
status = response['status'] | |
time.sleep(1) | |
return response | |
if __name__ == '__main__': | |
main(sys.argv) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
. ~/jenkins-google-auth/bin/activate | |
~/bin/agent.py list 2>/dev/null | grep '.' | while read agent; do | |
#echo | |
#echo "Checking status of $agent..." | |
# 1. Check to see if there is such a host in DNS. | |
# 2. check if we can SSH into the host. If we can, then check the process and maybe shut down. | |
# This makes sure that we don't consider an SSH failure to be reason to shut down the node. | |
if ping -c1 $agent 2>&1 |grep unknown >/dev/null; then | |
echo "node $agent is offline" | |
else | |
if ssh $agent echo < /dev/null > /dev/null; then | |
if PID=$(ssh $agent "pgrep -f '^java.*agent.jar'" < /dev/null || ssh $agent "pgrep -f '^java.*slave.jar'" < /dev/null); then | |
echo "agent|slave jar is still running on $agent ("$PID"). Leaving things alone..." | |
else | |
echo "agent.jar is NOT running on $agent. Shutting down instance..." | |
~/bin/agent.py down --instance_name=$agent | |
fi | |
else | |
#echo "SSH FAILED TO $agent -- sup with that?" | |
#echo 'Maybe the instance is TERMINATED instead of deleted? It should be deleted.' | |
echo "Maybe instance $agent is offline?" | |
fi | |
fi | |
done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Each jenkins node configured to "Launch slave via execution of command on the Master" and then to "Launch command": | |
/var/lib/jenkins/bin/start-agent-and-connect slave-1 | |
/var/lib/jenkins/bin/start-agent-and-connect slave-2 | |
etc. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -x | |
. ~/jenkins-google-auth/bin/activate | |
echo "Starting agent and connecting..." | |
~/bin/agent.py up --instance_name=$1 | |
echo 'Connecting...' | |
# SSH into the agent, grab the latest agent.jar from the master, and run it. | |
AGENT_JAR=~/tmp/agent.jar.$$ | |
curl http://localhost:8080/jnlpJars/agent.jar > $AGENT_JAR && | |
scp $AGENT_JAR $1:agent.jar && | |
rm $AGENT_JAR && | |
ssh $1 " | |
exec java -Xms1024m -Djava.awt.headless=true -jar ~/agent.jar | |
" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment