Openshift - Troubleshooting: Etcd
Commands
Display endpoints health:
oc exec -it etcd-{master} -- etcdctl endpoint health -w table
# Ex: oc exec -it etcd-master-1 -- etcdctl endpoint health -w table
Display etcd entries:
Get ETCD Leader:
Get ETCD Alarms:
Watch ETCD Events:
Scripts
Get all requests by type:
master="master-1"
time_seconds=30
top_count=10
data_file=/tmp/etcd-watch.data
function logger() {
lvl=$1
msg=$2
echo "$(date +'%Y-%m-%d %H:%M:%S') $lvl - $msg"
}
# Watch etcd events and redirect to a file
logger INFO "Start etcd watching request (${time_seconds}s) ..."
nohup oc exec -n openshift-etcd -it etcd-${master} -- etcdctl watch --prefix / > $data_file 2>&1 &
# Get nohup pid
process_id=$!
# Wait
sleep $time_seconds
# Kill Process
logger INFO "Stop etcd watching request process ($process_id) ..."
kill -9 $process_id > /dev/null 2>&1
# Formatted all events
grep -a -E '^(PUT|DELETE)' -A1 $data_file | grep -v '^\-' | sed -r '/PUT|DELETE/{N;s|\n|:|}' > ${data_file}.formatted
# Events by type
echo "ETCD Requests by Type"
echo "---------------------"
awk -F':' '{print $1}' ${data_file}.formatted | sort | uniq -c | sort -k1n
# Display events
echo ""
echo "All ETCD Requests (Top $top_count)"
echo "--------------------------"
cat ${data_file}.formatted | sort | uniq -c | sort -k1n | tail -n $top_count
# Get events by namespace
echo ""
echo "ETCD Requests by Type/NS (Top $top_count)"
echo "---------------------------------"
awk -F'/' '{print $1"/"$3"/"$4}' ${data_file}.formatted | sort | uniq -c | sort -k1n | tail -n $top_count
echo ""
logger INFO "Clean tmp files ..."
rm -f $data_file ${data_file}.formatted