Initial version
Change-Id: I7f5d09024cf75e0872fddfadd9462e114b0de76f
diff --git a/scripts/wmf-update-known-hosts-production b/scripts/wmf-update-known-hosts-production
new file mode 100755
index 0000000..d624e15
--- /dev/null
+++ b/scripts/wmf-update-known-hosts-production
@@ -0,0 +1,176 @@
+#!/bin/bash
+
+##############################################################################
+# WMF Update production known hosts
+#
+# DESCRIPTION:
+# - Populate a known_hosts file with all the production hosts and services
+# in the Wikimedia Foundation production infrastructure for easy
+# autocompletion while keeping StrictHostKeyChecking active:
+# - sync all the known hosts from a bastion
+# - clean the hostname without FQDN in it
+# - optionally generate known hosts for services defined as CNAMEs in the
+# DNS repository, see PARAMS below. This allows for the autocompletion of
+# active/passive services like icinga.wikimedia.org.
+# - Silently ignore all CNAMEs to the main DYNA record (dyna.wikimedia.org.)
+# - Keeps a backup file with the previous known hosts
+# - Show a diff between the new known hosts and the current ones
+#
+# It saves the known hosts into KNOWN_HOST_FILE, adjust this and/or the
+# UserKnownHostsFile parameter in your ~/.ssh/config in order for them to
+# match. A warning will be shown if they don't match.
+#
+# By default only the hosts from the choosen BASTION_HOST known_hosts file
+# will be imported, cleaning the hostname (not the FQDN) to ease the auto-
+# completion when ssh-ing.
+#
+# PARAMS:
+# It accept one positional argument that, if specified, must be the path to
+# a local clone of the Operations DNS repository, (either from Gerrit or from
+# GitHub):
+# https://gerrit.wikimedia.org/r/operations/dns
+# In this case also the services defined as CNAMEs in the wikimedia.org and
+# wmnet zone files will be added with the identity of the target host, if
+# that is found in the known_hosts file, skipping the missing ones.
+#
+# USAGE:
+# wmf-update-prod-known-hosts [PATH_TO_DNS_REPOSITORY]
+#
+# Author: Riccardo Coccioli <rcoccioli@wikimedia.org>
+# Date: 2017-06-21
+# Last update: 2019-11-05
+# Dependencies: colordiff
+# Version: 1.2
+# License: GPLv3+
+##############################################################################
+
+set -e
+
+DNS_REPO_PATH="${1}"
+KNOWN_HOSTS_PATH="${HOME}/.ssh/known_hosts.d"
+KNOWN_HOST_FILE="${KNOWN_HOSTS_PATH}/wmf-prod"
+BASTION_HOST="bast2002.wikimedia.org"
+MAIN_DYNA_RECORD="dyna.wikimedia.org."
+
+if [[ ! -d "${KNOWN_HOSTS_PATH}" ]]; then
+ echo "ERROR: KNOWN_HOSTS_PATH '${KNOWN_HOSTS_PATH}' is not a directory, you might want to adjust the constant in the script or create it"
+ exit 1
+fi
+
+if [[ -n "${DNS_REPO_PATH}" ]]; then
+ if [[ ! -d "${DNS_REPO_PATH}" ]]; then
+ echo "ERROR: DNS_REPO_PATH '${DNS_REPO_PATH}' is not a directory"
+ exit 2
+ fi
+ if ! git -C "${DNS_REPO_PATH}" remote -v | egrep '(gerrit.wikimedia.org|github.com\/wikimedia)' | grep -cq 'operations[/-]dns'; then
+ echo "ERROR: DNS_REPO_PATH '${DNS_REPO_PATH}' doesn't seems to be a checkout of the operations/dns repository"
+ exit 3
+ fi
+fi
+
+function parse_line() {
+ local line="${1}"
+ local domain="${2}"
+ local name
+ local target
+ local found
+
+ name="$(echo "${line}" | cut -d' ' -f1)"
+ target="$(echo "${line}" | cut -d' ' -f2)"
+
+ if [[ "${target}" == "${MAIN_DYNA_RECORD}" ]]; then
+ # Silently ignore CNAMEs to the MAIN_DYNA_RECORD
+ return
+ fi
+
+ sep="\."
+ if [[ "${target: -1}" == '.' ]]; then
+ target="${target%?}"
+ sep=","
+ fi
+
+ set +e
+ found=$(grep -c "^${target}${sep}" "${KNOWN_HOST_FILE}.new")
+ set -e
+ if [[ "${found}" -eq "0" || "${found}" -gt "1" ]]; then
+ >&2 echo "Skipping '${target}' CNAME target, found ${found}/1 matches"
+ return
+ fi
+
+ grep "^${target}${sep}" "${KNOWN_HOST_FILE}.new" | awk -v name="${name}" -v domain="${domain}" '{ printf name"."domain; for (i = 2; i <= NF; i++) printf FS$i; print NL }'
+}
+
+function extract_cnames_from_zone() {
+ local zone_file
+ local origin
+ local boundaries
+ local start
+ local end
+ local domain
+
+ zone_file="${1}"
+ if [[ ! -f "${zone_file}" ]]; then
+ >&2 echo "Unable to find zone file ${zone_file}, skipping..."
+ return
+ fi
+
+ origin="${2}"
+ if [[ -n "${origin}" ]]; then
+ boundaries="$(grep -n "\$ORIGIN" "${zone_file}" | grep -A 1 "\$ORIGIN ${origin}\.$")"
+ start=$(echo "${boundaries}" | head -n1 | cut -d':' -f1)
+ end=$(echo "${boundaries}" | tail -n1 | cut -d':' -f1)
+ domain="${origin}"
+
+ head -n "${end}" "${zone_file}" | tail -n "$((end - start))" | awk '/ CNAME / { print $1, $5 }' | while read -r line; do
+ parse_line "${line}" "${domain}" >> "${KNOWN_HOST_FILE}.new"
+ done
+ else
+ domain="$(basename "${zone_file}")"
+ awk '/ CNAME / { print $1, $5 }' "${zone_file}" | while read -r line; do
+ parse_line "${line}" "${domain}" >> "${KNOWN_HOST_FILE}.new"
+ done
+ fi
+}
+
+# Get new known hosts
+echo "===> SSHing to ${BASTION_HOST} (if a smartcard input is needed, check it now)"
+ssh "${BASTION_HOST}" 'cat /etc/ssh/ssh_known_hosts' > "${KNOWN_HOST_FILE}.new"
+
+# Remove the non-FQDN hostnames to avoid multiple autocompletions
+awk -F ',' '{ printf $1; for (i = 3; i <= NF; i++) printf FS$i; print NL }' "${KNOWN_HOST_FILE}.new" > "${KNOWN_HOST_FILE}.new.clean"
+mv -f "${KNOWN_HOST_FILE}.new.clean" "${KNOWN_HOST_FILE}.new"
+
+if [[ -n "${DNS_REPO_PATH}" ]]; then
+ extract_cnames_from_zone "${DNS_REPO_PATH}/templates/wikimedia.org"
+ extract_cnames_from_zone "${DNS_REPO_PATH}/templates/wmnet" "eqiad.wmnet"
+fi
+
+PREV_COUNT=0
+PREV_FILE=/dev/null
+if [[ -f "${KNOWN_HOST_FILE}" ]]; then
+ PREV_COUNT="$(wc -l "${KNOWN_HOST_FILE}")"
+ PREV_FILE="${KNOWN_HOST_FILE}"
+fi
+
+echo "==== DIFFERENCES ===="
+colordiff --fakeexitcode "${PREV_FILE}" "${KNOWN_HOST_FILE}.new"
+echo "====================="
+echo "Going from ${PREV_COUNT} to $(wc -l "${KNOWN_HOST_FILE}.new") known hosts and services"
+
+if [[ -f "${KNOWN_HOST_FILE}" ]]; then
+ mv -vf "${KNOWN_HOST_FILE}" "${KNOWN_HOST_FILE}.old"
+ echo "Backup file is ${KNOWN_HOST_FILE}.old"
+fi
+mv -v "${KNOWN_HOST_FILE}.new" "${KNOWN_HOST_FILE}"
+echo "New file generated at ${KNOWN_HOST_FILE}"
+
+if ! egrep -cq "UserKnownHostsFile .*/wmf-prod( |$)" "${HOME}/.ssh/config"; then
+ echo "WARNING: You may need to add/update 'UserKnownHostsFile ${KNOWN_HOST_FILE}' to your ~/.ssh/config"
+fi
+
+if [[ "${SHELL}" == '/usr/bin/zsh' ]]; then
+ echo 'Add this line to your .zshrc to tab-complete remote hosts:'
+ echo "zstyle ':completion:*:hosts' known-hosts-files ${HOME}/.ssh/known_hosts ${KNOWN_HOST_FILE}"
+fi
+
+exit 0
\ No newline at end of file