Skip to content

Openshift - Troubleshooting: Etcd

Commands

Display endpoints health:

oc exec -it etcd-{master} -- etcdctl endpoint health -w table
# Ex: oc exec -it etcd-master-1 -- etcdctl endpoint health -w table

Display etcd entries:

oc exec -it etcd-{master} -- etcdctl get / --keys-only --prefix

Get ETCD Leader:

oc exec -it etcd-{master} -- etcdctl endpoint status --cluster -w table

Get ETCD Alarms:

oc exec -it etcd-{master} -- etcdctl alarm list

Watch ETCD Events:

oc exec -it etcd-{master} -- etcdctl watch --prefix /

Scripts

Get all requests by type:

master="master-1"
time_seconds=30
top_count=10
data_file=/tmp/etcd-watch.data

function logger() {
  lvl=$1
  msg=$2
  echo "$(date +'%Y-%m-%d %H:%M:%S') $lvl - $msg"
}

# Watch etcd events and redirect to a file
logger INFO "Start etcd watching request (${time_seconds}s) ..."
nohup oc exec -n openshift-etcd -it etcd-${master} -- etcdctl watch --prefix / > $data_file 2>&1 &
# Get nohup pid
process_id=$!
# Wait
sleep $time_seconds

# Kill Process
logger INFO "Stop etcd watching request process ($process_id) ..."
kill -9 $process_id > /dev/null 2>&1

# Formatted all events
grep -a -E '^(PUT|DELETE)' -A1 $data_file | grep -v '^\-' | sed -r '/PUT|DELETE/{N;s|\n|:|}' > ${data_file}.formatted

# Events by type
echo "ETCD Requests by Type"
echo "---------------------"
awk -F':' '{print $1}' ${data_file}.formatted | sort | uniq -c | sort -k1n

# Display events
echo ""
echo "All ETCD Requests (Top $top_count)"
echo "--------------------------"
cat ${data_file}.formatted | sort | uniq -c | sort -k1n | tail -n $top_count

# Get events by namespace
echo ""
echo "ETCD Requests by Type/NS (Top $top_count)"
echo "---------------------------------"
awk -F'/' '{print $1"/"$3"/"$4}' ${data_file}.formatted | sort | uniq -c | sort -k1n | tail -n $top_count

echo ""
logger INFO "Clean tmp files ..."
rm -f $data_file ${data_file}.formatted
Back to top