-
-
Save skippy/8c44e721fe7e2858bdd4a3b4d3e6d46b to your computer and use it in GitHub Desktop.
managing vault tls
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
you'll want to setup ENVs, such as: | |
(dockerfile) | |
``` | |
ENV VAULT_ADDR "https://active.vault.service.capsci:8200" | |
ENV VAULT_CACERT "/vault/config/vault_tls.ca" | |
ENV CURL_CA_BUNDLE "/vault/config/vault_tls.ca" | |
``` | |
/vault/config/vault_tls.ca is pre-cached from when vault is initally bootstrapped and the PKI instance is initalized. But once it is, and you put it into your container, you are good to go. | |
To unseal, I use this helper script: | |
```shell | |
#!/bin/bash | |
set -e | |
. $(dirname $0)/common.sh | |
cmd="vault unseal -address=https://127.0.0.1:8200 -ca-cert=" | |
if [ -a /tmp/tmp_ca/ca.cer ]; then | |
cmd="$cmd/tmp/tmp_ca/ca.cer" | |
else | |
cmd="$cmd/vault/config/vault_tls.ca" | |
fi | |
exec $cmd | |
``` | |
where I explicitly set the CA to the tmp cert |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |
. $DIR/common.sh | |
# This script is run via the docker entrypoint script and only when vault is | |
# being started in server mdoe. | |
# | |
# It assumes that the PKI was already setup and the CA is in this docker container. | |
# If not, you'll get stuck at 4, which is fine when initalizing the cluster | |
# | |
# It: | |
# 1) creates a tmp ca before vault server is started | |
# 2) forks a background job in bash and then returns so vault server can start | |
# 3) monitors consul and waits for the local instance to become available and unsealed | |
# 4) once unsealed, it: | |
# 4-a) gets a vault-based pki cert via: aws-ec2 auth -> appRole auth -> pki request. | |
# all tokens leading up to the pki request are all very short lived and limited | |
# to allowing just the single request at hand. | |
# 4-b) if the pki request fails, keep trying; sometimes if vault doesn't have a master | |
# at that moment, like consul is shifting leadership, we'll want to capture that, log | |
# it, but then try again | |
# 4-c) put the new certs in the right place and send a SIGHUP to vault to reload with | |
# the new certs | |
# 4-d) figure out the ttl for the new certs | |
# 4-e) remove the tmp_ca and all the certs from disk | |
# 4-f) sleep a somewhat randomized amount of time but well before the new cert expires | |
# 4-g) after sleep, re-auth and pull down a new pki and repeat the steps | |
function setup_tmp_vault_tls() { | |
isOnRunningServer && return 0 | |
log "START: tmp tls for vault API" | |
orig_dir=$(pwd) | |
mkdir /tmp/tmp_ca | |
cd /tmp/tmp_ca | |
# based upon http://dunne.io/vault-and-self-signed-ssl-certificates | |
openssl req -batch -newkey rsa:2048 -days 1 -x509 -nodes -out ca.cer -keyout /tmp/tmp_ca/ca_key.pem | |
openssl req -batch -newkey rsa:2048 -nodes -out vault.csr -keyout /tmp/active.vault.service.capsci.key | |
echo 000a > serialfile | |
touch certindex | |
cat > tmp_vault_ca.cnf <<EOL | |
[ ca ] | |
default_ca = tmp_vault_ca | |
[ tmp_vault_ca ] | |
new_certs_dir = /tmp/tmp_ca | |
unique_subject = no | |
certificate = /tmp/tmp_ca/ca.cer | |
database = /tmp/tmp_ca/certindex | |
private_key = /tmp/tmp_ca/ca_key.pem | |
serial = /tmp/tmp_ca/serialfile | |
default_days = 1 | |
default_md = sha1 | |
policy = tmp_vault_ca_policy | |
x509_extensions = tmp_vault_ca_extensions | |
copy_extensions = copy | |
[ tmp_vault_ca_policy ] | |
commonName = optional | |
stateOrProvinceName = supplied | |
countryName = supplied | |
emailAddress = optional | |
organizationName = supplied | |
organizationalUnitName = optional | |
[ tmp_vault_ca_extensions ] | |
basicConstraints = CA:false | |
subjectKeyIdentifier = hash | |
authorityKeyIdentifier = keyid:always | |
subjectAltName = IP:127.0.0.1 | |
keyUsage = digitalSignature,keyEncipherment | |
extendedKeyUsage = serverAuth | |
EOL | |
openssl ca -batch -config tmp_vault_ca.cnf -notext -in vault.csr -out /tmp/active.vault.service.capsci.crt | |
cat ca.cer >> /tmp/active.vault.service.capsci.crt | |
# we generally set the VAULT_CACERT; this makes it easier to use some of the utilities | |
# without having to import the host's ca-certificates.cert | |
# HOWEVER, it causes problems with reloading when we have | |
# unset VAULT_CACERT | |
cd $orig_dir | |
log "FINISH: tmp tls for vault API" | |
} | |
function setup_vault_tls() { | |
role_name='service.vault' | |
result=$(vault status 2>&1 || true) | |
if [[ ${result} = *"certificate signed by unknown authority"* ]] || [[ ${result} = *"x509: certificate is valid for"* ]]; then | |
# we are using our main CA certificate, but the host requires the local CA... | |
# this situation occures when we are bootstrapping the initial node | |
export ORIG_VAULT_CACERT="$VAULT_CACERT" | |
export ORIG_CURL_CA_BUNDLE="$CURL_CA_BUNDLE" | |
export ORIG_VAULT_ADDR="$VAULT_ADDR" | |
export VAULT_CACERT="/tmp/tmp_ca/ca.cer" | |
export CURL_CA_BUNDLE="/tmp/tmp_ca/ca.cer" | |
export VAULT_ADDR=https://127.0.0.1:8200 | |
elif [ ! -z "$ORIG_VAULT_CACERT" ]; then | |
export VAULT_CACERT="$ORIG_VAULT_CACERT" | |
export CURL_CA_BUNDLE="$ORIG_CURL_CA_BUNDLE" | |
export VAULT_ADDR="$ORIG_VAULT_ADDR" | |
unset ORIG_VAULT_CACERT | |
unset ORIG_CURL_CA_BUNDLE | |
unset ORIG_VAULT_ADDR | |
fi | |
# start our authentication process | |
# - get a short token from ec2 which can only get an appRole | |
# - call the app_role to get the role-based token | |
# - that token has a policy which allows it to retrieve a pki | |
response=$($DIR/auth_ec2.sh) | |
instance_token=$(echo $response | jq -r '.auth.client_token') | |
if [ "$instance_token" == "null" ] || [ -z "$instance_token" ]; then | |
log "failed retrieving auth_ec2-based token" | |
log "$response" | |
return 1 | |
fi | |
response=$($DIR/auth_app_role.sh "$role_name" "$instance_token") | |
vault_token=$(echo $response | jq -r '.auth.client_token') | |
if [ "$vault_token" == "null" ] || [ -z "$vault_token" ]; then | |
log "failed retrieving auth_app_role-based token" | |
log "$response" | |
return 1 | |
fi | |
# lets not log to STDOUT all the stuff that vault auth outputs | |
vault auth $vault_token >/dev/null | |
log "logged in via aws-ec2 and app_role to setup vault TLS" | |
output=$(vault write -format json pki/internal-services/issue/vault common_name=active.vault.service.capsci ip_sans=127.0.0.1) | |
log "retrieving certs from vault 'pki/internal-services/issue/vault'" | |
rm -f /tmp/active.vault.service.capsci.* | |
echo $output | jq -er '.data.certificate' >> /tmp/active.vault.service.capsci.crt | |
echo $output | jq -er '.data.issuing_ca' >> /tmp/active.vault.service.capsci.crt | |
echo $output | jq -er '.data.private_key' >> /tmp/active.vault.service.capsci.key | |
echo $output | jq -er '.data.issuing_ca' > /vault/config/vault_tls.ca | |
return 0 | |
} | |
function reloadServer() { | |
vault_pid=$(ps auwx | grep 'vault server' | grep -v grep | awk '{print $1}') | |
log "reloading vault TLS (pid $vault_pid)" | |
kill -s HUP $vault_pid | |
sleep 5 # wait a bit before deleting certs! | |
# no need to keep these around; lets put on our paranoid hat and delete them | |
rm -rf /tmp/tmp_ca | |
rm -f /tmp/active.vault.service.capsci.* | |
} | |
function isOnRunningServer() { | |
vault_pid=$(ps auwx | grep 'vault server' | grep -v grep | awk '{print $1}') | |
if [ -z "$vault_pid" ]; then | |
return 1 | |
fi | |
return 0 | |
} | |
############################################################################# | |
############################### beging script ############################### | |
############################################################################# | |
# useful for debugging purposes | |
mode=${1:-loop} | |
if [ "$mode" = "update" ]; then | |
log "setting up vault TLS" | |
setup_vault_tls || quit 1 | |
reloadServer | |
quit 0 | |
fi | |
isOnRunningServer || setup_tmp_vault_tls | |
# run the rest of this in a background job | |
# start with vault_sealed being empty... it is also very likely that vault server may not have started | |
# by the time this is called; so lets wait a bit and then try to see if it is unsealed | |
sealed_status="critical" | |
while /bin/true; do | |
if [ "$sealed_status" != "passing" ]; then | |
sleep 5s | |
if isOnRunningServer ; then | |
sealed_status=$(curl -s http://0.0.0.0:8500/v1/health/checks/vault?pretty=1 | jq -r ".[] | select(.Node == \"$(cat /etc/hostname)\").Status") | |
if [ "$sealed_status" != "passing" ]; then | |
log "waiting for vault to be unsealed on host" | |
else | |
log "vault unsealed; reloading TLS" | |
fi | |
else | |
log "waiting for server to start before we can check seal status" | |
fi | |
continue | |
fi | |
log "rotating vault tls with vault pki certs" | |
if ! setup_vault_tls ; then | |
# this usually occurs if vault isn't ready to start accepting connections... | |
log "failed to setup vault tls; try again in 5 seconds" | |
sleep 5s | |
continue | |
fi | |
# find the cert time and create a ttl until we reload the cert | |
endDate=$(openssl x509 -enddate -noout -in /tmp/active.vault.service.capsci.crt | cut -d= -f 2) | |
endDateSecs=$(date --date="${endDate}" +%s) | |
currTime=$(date +%s) | |
secs=$(expr $endDateSecs - $currTime) | |
secs_with_buffer=`expr $secs / 2` | |
rand_ttl_delta=$(shuf -i 1-`expr $secs_with_buffer / 2` -n 1) | |
vault_cert_ttl=$(expr $secs_with_buffer + $rand_ttl_delta) | |
reloadServer | |
log "reloading tls cert in $vault_cert_ttl secs or ~ $(expr $vault_cert_ttl / 3600) hours (cert ttl: $secs secs or ~ $(expr $secs / 3600) hours)" | |
sleep ${vault_cert_ttl}s | |
done & | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment