Browse Source

CAD-1073 bench: anomaly detection

master
Kosyrev Serge 2 years ago committed by Kosyrev Serge
parent
commit
c8e9a1930a
No known key found for this signature in database
GPG Key ID: 2B3008CDF13E74BF
  1. 72
      bench/bench.sh
  2. 266
      bench/lib-analyses.sh
  3. 95
      bench/lib-analysis.sh
  4. 39
      bench/lib-benchrun.sh
  5. 0
      bench/lib-deploy.sh
  6. 19
      bench/lib-params.sh
  7. 0
      bench/lib-profile.sh
  8. 58
      bench/lib-report.sh
  9. 201
      bench/lib-sanity.sh
  10. 25
      bench/lib-sheets.sh
  11. 61
      bench/lib-tag.sh
  12. 26
      bench/lib.sh
  13. 8
      nix/sources.json
  14. 147
      scripts/lib-analysis.sh

72
scripts/bench.sh → bench/bench.sh

@ -2,12 +2,18 @@
# shellcheck disable=2207,2155,1007,1090
set -euo pipefail
. "$(dirname "$0")"/lib.sh
. "$(dirname "$0")"/lib-deploy.sh
. "$(dirname "$0")"/lib-params.sh
. "$(dirname "$0")"/lib-profile.sh
. "$(dirname "$0")"/lib-benchrun.sh
. "$(dirname "$0")"/lib-analysis.sh
__BENCH_BASEPATH=$(dirname "$(realpath "$0")")
. "$__BENCH_BASEPATH"/lib.sh
. "$__BENCH_BASEPATH"/lib-analyses.sh
. "$__BENCH_BASEPATH"/lib-analysis.sh
. "$__BENCH_BASEPATH"/lib-benchrun.sh
. "$__BENCH_BASEPATH"/lib-deploy.sh
. "$__BENCH_BASEPATH"/lib-params.sh
. "$__BENCH_BASEPATH"/lib-profile.sh
. "$__BENCH_BASEPATH"/lib-sanity.sh
. "$__BENCH_BASEPATH"/lib-sheets.sh
. "$__BENCH_BASEPATH"/lib-report.sh
. "$__BENCH_BASEPATH"/lib-tag.sh
###
### TODO
###
@ -144,7 +150,7 @@ main() {
local op="${1:-${default_op}}"; shift || true
case "${op}" in
init-params | init | reinit-params | reinit ) true;;
init-params | init | reinit-params | reinit | analyse | a | analyse-run | arun | sanity-check | sanity | sanity-check-dir | sane-dir | srun | call | mass-analyse | mass ) true;;
* ) params_check;; esac
case "${op}" in
@ -175,8 +181,21 @@ main() {
analyse | a )
export tagroot=$(realpath ./runs)
analyse_tag "$@";;
mark-run-broken | mark-broken | broken )
mark_run_broken "$@";;
analyse-run | arun )
export tagroot=$(realpath ./runs)
analyse_run "$@";;
mass-analyse | mass )
mass_analyse "$@";;
sanity-check | sanity | sane | check )
export tagroot=$(realpath ./runs)
sanity_check_tag "$@";;
sanity-check-run | sanity-run | sane-run | check-run | srun )
sanity_check_run "$@";;
mark-tag-broken | mark-broken | broken )
local tag dir
tag=${1:-$(cluster_last_meta_tag)}
dir="./runs/${tag}"
mark_run_broken "$dir" "\"user-decision\"";;
package | pkg )
tagroot=$(realpath ./runs)
resultroot=$(realpath ../bench-results)
@ -227,7 +246,7 @@ main() {
blocks ) op_blocks;;
eval ) eval "${@@Q}";;
call ) "$@";;
* ) usage; exit 1;; esac
}
trap atexit EXIT
@ -274,25 +293,25 @@ bench_profile() {
if ! test -f "${deploylog}" -a -n "${no_deploy}"
then profile_deploy "${prof}"; fi
local tag
if ! op_bench_start "${prof}" "${deploylog}"
then tag=$(cluster_last_meta_tag)
process_broken_run "$tag"
return 1; fi
op_bench_start "${prof}" "${deploylog}"
ret=$?
local tag dir
tag=$(cluster_last_meta_tag)
dir=$(realpath "./runs/$tag")
if test $ret != 0
then process_broken_run "$dir"
return 1; fi
oprint "$(date), termination condition satisfied, stopping cluster."
op_stop
op_bench_fetch
fetch_run "$dir"
oprint "concluded run: ${tag}"
tagroot=$(realpath ./runs)
resultroot=$(realpath ../bench-results)
export tagroot resultroot
local tag
tag=$(cluster_last_meta_tag)
analyse_tag "${tag}"
package_tag "${tag}"
if analyse_run "${dir}"
then package_run "${dir}"
fi
}
op_bench_start() {
@ -530,10 +549,9 @@ op_wait_for_empty_blocks() {
return 1; fi
}
op_bench_fetch() {
local tag dir components
tag=${1:-$(cluster_last_meta_tag)}
dir="./runs/${tag}"
fetch_run() {
local dir=${1:-.} tag components
tag=$(run_tag "$dir")
oprint "run directory: ${dir}"
pushd "${dir}" >/dev/null || return 1

266
bench/lib-analyses.sh

@ -0,0 +1,266 @@
#!/usr/bin/env bash
# shellcheck disable=1091,2016
analysis_list=()
analysis_list+=(analysis_cleanup)
analysis_cleanup() {
local dir=${1:-.}
rm -f "$dir"/analysis.json
rm -rf "$dir"/analysis
mkdir -p "$dir"/analysis
}
analysis_list+=(analysis_block_arrivals)
analysis_block_arrivals() {
local dir=${1:-.}
cat "$dir"/logs/block-arrivals.gauge
json_file_append "$dir"/analysis.json '
{ block_arrivals: $arrivals
}' --rawfile arrivals "$dir"/logs/block-arrivals.gauge <<<0
}
analysis_list+=(analysis_unpack)
analysis_unpack() {
local dir=${1:-.}
tar x -C "$dir"/analysis -af "$dir"/logs/logs-explorer.tar.xz
tar x -C "$dir"/analysis -af "$dir"/logs/logs-nodes.tar.xz
}
analysis_list+=(analysis_log_inventory)
analysis_log_inventory()
{
local dir=${1:-.}; shift
local machines=("$@")
collect_jsonlog_inventory "$dir"/analysis "${machines[@]}" \
> "$dir"/analysis/log-inventory.json
json_file_append "$dir"/analysis.json \
'{ final_log_timestamp: ($logs | max_by(.latest) | .latest)
, first_node_log_timestamp: ($logs
| map (select(.name != "explorer" and
.name != "generator"))
| min_by(.earliest) | .earliest)
, final_node_log_timestamp: ($logs
| map (select(.name != "explorer" and
.name != "generator"))
| max_by(.latest) | .latest)
, logs: $logs
}' --slurpfile logs "$dir"/analysis/log-inventory.json <<<0
}
analysis_list+=(analysis_timetoblock)
analysis_timetoblock() {
local dir=${1:-.}
dir=$(realpath "$dir")
pushd "$dir"/analysis >/dev/null || return 1
"$dir"/tools/analyse.sh \
logs-explorer/generator \
logs-explorer/node \
"$dir"/analysis
cp -f analysis/*.{csv,json} .
popd >/dev/null || return 1
json_file_prepend "$dir"/analysis.json \
'{ tx_stats: $txstats[0]
}' --slurpfile txstats "$dir"/analysis/tx-stats.json <<<0
}
analysis_list+=(analysis_submission_threads)
analysis_submission_threads() {
local dir=${1:-.} sub_tids tnum
sub_tids="$("$dir"/tools/generator-logs.sh log-tids \
"$dir"/analysis/logs-explorer/generator.json || true)"
json_file_append "$dir"/analysis.json \
'{ submission_tids: '"$(jq --slurp <<<$sub_tids)"' }' <<<0
for tnum in $(seq 0 $(($(echo "$sub_tids" | wc -w) - 1)))
do "$dir"/tools/generator-logs.sh tid-trace "${tnum}" \
"$dir"/analysis/logs-explorer/generator.json \
> "$dir"/analysis/generator.submission-thread-trace."${tnum}".json
done
}
analysis_list+=(analysis_from_benchmarking)
analysis_from_benchmarking() {
local dir=${1:-.}
local analysis aname files
files=($(ls -- "$dir"/analysis/logs-node-*/node-*.json 2>/dev/null || true))
if test ${#files[*]} -gt 0
then for analysis in $(ls -- "$dir"/tools/node.*.sh 2>/dev/null || true)
do aname=$(sed 's_^.*/node\.\(.*\)\.sh$_\1_' <<<$analysis)
echo -n " $aname.node"
"$dir"/tools/node."$aname".sh "${files[@]}" \
> "$dir"/analysis/node."$aname".json
test -x "$dir"/tools/tocsv."$aname".sh &&
"$dir"/tools/tocsv."$aname".sh \
< "$dir"/analysis/node."$aname".json \
> "$dir"/analysis/node."$aname".csv; done; fi
files=($(ls -- "$dir"/analysis/logs-explorer/node-*.json 2>/dev/null || true))
if test ${#files[*]} -gt 0
then for analysis in $(ls -- "$dir"/tools/explorer.*.sh 2>/dev/null || true)
do aname=$(sed 's_^.*/explorer\.\(.*\)\.sh$_\1_' <<<$analysis)
echo -n " $aname.explorer"
"$dir"/tools/explorer."$aname".sh "${files[@]}" \
> "$dir"/analysis/explorer."$aname".json
test -x "$dir"/tools/tocsv."$aname".sh &&
"$dir"/tools/tocsv."$aname".sh \
< "$dir"/analysis/explorer."$aname".json \
> "$dir"/analysis/explorer."$aname".csv; done; fi
files=($(ls -- "$dir"/analysis/logs-explorer/generator*json 2>/dev/null || true))
if test ${#files[*]} -gt 0
then for analysis in $(ls -- "$dir"/tools/generator.*.sh 2>/dev/null || true)
do aname=$(sed 's_^.*/generator\.\(.*\)\.sh$_\1_' <<<$analysis)
echo -n " $aname.generator"
"$dir"/tools/generator."$aname".sh "${files[@]}" \
> "$dir"/analysis/generator."$aname".json
test -x "$dir"/tools/tocsv."$aname".sh &&
"$dir"/tools/tocsv."$aname".sh \
< "$dir"/analysis/generator."$aname".json \
> "$dir"/analysis/generator."$aname".csv; done; fi
}
analysis_list+=(analysis_TraceForgeInvalidBlock)
analysis_TraceForgeInvalidBlock() {
local dir=${1:-.} msg
msg=$(echo ${FUNCNAME[0]} | cut -d_ -f2)
files=($(ls -- "$dir"/analysis/logs-node-*/node-*.json 2>/dev/null || true))
if test ${#files[*]} -eq 0
then return; fi
grep --quiet --no-filename -F "\"$msg\"" "${files[@]}" || true |
jq 'def katip_timestamp_to_iso8601:
.[:-4] + "Z";
.
| map
( (.at | katip_timestamp_to_iso8601)
as $date_iso
| { date_iso: $date_iso
, timestamp: $date_iso | fromdateiso8601
, reason: .data.reason
, slot: .data.slot
}
)
| sort_by (.timestamp)
| .[]
' --slurp --compact-output > "$dir"/analysis/node."$msg".json
}
analysis_list+=(analysis_message_types)
analysis_message_types() {
local dir=${1:-.} mach tnum sub_tids; shift
local machines=("$@")
for mach in ${machines[*]}
do echo -n .$mach >&2
local types key
"$dir"/tools/msgtypes.sh \
"$dir/analysis/logs-$mach"/node-*.json |
while read -r ty
test -n "$ty"
do key=$(jq .kind <<<$ty -r | sed 's_.*\.__g')
jq '{ key: .kind, value: $count }' <<<$ty \
--argjson count "$(grep -Fh "$key\"" \
"$dir/analysis/logs-$mach"/node-*.json |
wc -l)"
done |
jq '{ "\($name)": from_entries }
' --slurp --arg name "$mach"
# jq '{ "\($name)": $types }
# ' --arg name "$mach" --null-input \
# --argjson types "$("$dir"/tools/msgtypes.sh \
# "$dir/analysis/logs-$mach"/node-*.json |
# jq . --slurp)"
done | analysis_append "$dir" \
'{ message_types: add
}' --slurp
}
analysis_list+=(analysis_repackage_db)
analysis_repackage_db() {
local dir=${1:-.}
tar x -C "$dir"/analysis -af "$dir"/logs/db-analysis.tar.xz \
--wildcards '*.csv' '*.txt'
}
# TODO: broken
# analysis_list+=(analysis_tx_losses)
analysis_tx_losses() {
local dir=${1:-.}
dir=$(realpath "$dir")
pushd "$dir"/analysis >/dev/null || return 1
if jqtest '(.tx_stats.tx_missing != 0)' "$dir"/analysis.json
then echo -n " missing-txs"
. "$dir"/tools/lib-loganalysis.sh
op_analyse_losses; fi
popd >/dev/null || return 1
}
analysis_list+=(analysis_derived)
analysis_derived() {
local dir=${1:-.}
local f="$dir"/analysis/node.TraceMempoolRejectedTx.json
analysis_append "$dir" \
'{ tx_stats:
($analysis.tx_stats
+ { tx_rejected: $rejected
, tx_utxo_invalid: $utxo_invalid
, tx_missing_input: $missing_input })}
' --argjson rejected "$(wc -l <$f)" \
--argjson utxo_invalid "$(grep -F "(UTxOValidationUTxOError " $f | wc -l)" \
--argjson missing_input "$(grep -F "(UTxOMissingInput " $f | wc -l)" \
<<<0
}
analysis_list+=(analysis_sanity)
analysis_sanity() {
local dir=${1:-.} tag errors
tag=$(run_tag "$dir")
errors="$(sanity_check_run "$dir")"
if test "$errors" != "[]"
then echo
oprint "sanity check failed for tag: $tag"
echo "$errors" >&2
mark_run_broken "$dir" "$errors"
return 1; fi
}
###
### Aux
###
jsonlog_inventory() {
local name=$1; shift
local args fs=("$@")
args=(--arg name "$name"
--argjson earliest "$(head -n1 ${fs[0]})"
--argjson latest "$(tail -n1 ${fs[-1]})"
--argjson files "$(echo ${fs[*]} | shell_list_to_json)"
)
jq 'def katip_timestamp_to_iso8601:
.[:-4] + "Z";
.
| { name: $name
, earliest: ($earliest.at
| katip_timestamp_to_iso8601 | fromdateiso8601)
, latest: ( $latest.at
| katip_timestamp_to_iso8601 | fromdateiso8601)
, files: $files
}' "${args[@]}" <<<0
}

95
bench/lib-analysis.sh

@ -0,0 +1,95 @@
#!/usr/bin/env bash
# shellcheck disable=1091,2016
collect_jsonlog_inventory() {
local dir=$1; shift
local constituents=("$@")
for mach in ${constituents[*]}
do jsons=($(ls -- "$dir"/logs-"$mach"/node-*.json))
jsonlog_inventory "$mach" "${jsons[@]}"; done
jsonlog_inventory "generator" "$dir"/logs-explorer/generator.json
}
analysis_append() {
local dir=$1 expr=$2; shift 2
json_file_append "$dir"/analysis.json '
$meta[0] as $meta
| $analysis[0] as $analysis
| '"$expr
" --slurpfile meta "$dir/meta.json" \
--slurpfile analysis "$dir/analysis.json" \
"$@"
}
analysis_prepend() {
local dir=$1 expr=$2; shift 2
json_file_prepend "$dir"/analysis.json '
$meta[0] as $meta
| $analysis[0] as $analysis
| '"$expr
" --slurpfile meta "$dir/meta.json" \
--slurpfile analysis "$dir/analysis.json" \
"$@"
}
###
###
analyse_run() {
while test $# -ge 1
do case "$1" in
--list ) echo ${analysis_list[*]}; return;;
* ) break;; esac; shift; done
local dir=${1:-.} tag meta
dir=$(realpath "$dir")
if test ! -d "$dir"
then fail "run directory doesn't exist: $dir"; fi
if test ! -f "$dir/meta.json"
then fail "run directory doesn't has no metafile: $dir"; fi
run_fetch_benchmarking "$dir/tools"
machines=($(jq '.machine_info | keys | join(" ")
' --raw-output <"$dir/deployment-explorer.json"))
meta=$(jq .meta "$dir/meta.json")
tag=$(jq .tag <<<$meta --raw-output)
echo "--( processing logs in: $(basename "$dir")"
for a in "${analysis_list[@]}"
do echo -n " $a" | sed 's/analysis_//'
$a "$dir" "${machines[@]}"; done
patch_run "$dir"
rm -rf "$dir"/analysis/{analysis,logs-node-*,logs-explorer,startup}
oprint "analysed tag: ${tag}"
}
runs_in() {
local dir=${1:-.}
dir=$(realpath $dir)
find "$dir" -maxdepth 2 -mindepth 2 -name meta.json -type f | cut -d/ -f$(($(tr -cd / <<<$dir | wc -c) + 2))
}
mass_analyse() {
local parallel=
while test $# -ge 1
do case "$1" in
--parallel ) parallel=t;;
* ) break;; esac; shift; done
local dir=${1:-.} runs
runs=($(runs_in "$dir"))
oprint "analysing runs: ${runs[*]}"
for run in "${runs[@]}"
do if test -n "$parallel"
then analyse_run "$dir/$run" &
else analyse_run "$dir/$run"; fi; done
}

39
scripts/lib-benchrun.sh → bench/lib-benchrun.sh

@ -15,41 +15,38 @@ run_fetch_benchmarking() {
local targetdir=$1
oprint "fetching tools from 'cardano-benchmarking' $(nix-instantiate --eval -E "(import $(dirname "${self}")/../nix/sources.nix).cardano-benchmarking.rev" | tr -d '"' | cut -c-8) .."
export nix_store_benchmarking=$(nix-instantiate --eval -E "(import $(dirname "${self}")/../nix/sources.nix).cardano-benchmarking.outPath" | tr -d '"' )
test -n "${nix_store_benchmarking}" ||
test -d "$nix_store_benchmarking" ||
fail "couldn't fetch 'cardano-benchmarking'"
mkdir -p 'tools'
cp -fa "${nix_store_benchmarking}"/scripts/*.{sh,sql} "$targetdir"
mkdir -p "$targetdir"
cp -fa "$nix_store_benchmarking"/{analyses/*.sh,scripts/*.{sh,sql}} "$targetdir"
}
is_run_broken() {
local tag=$1
dir="./runs/${tag}"
local dir=${1:-}
jqtest .broken "$dir"/meta.json
test -f "$dir"/analysis.json &&
jqtest .anomalies "$dir"/analysis.json ||
jqtest .broken "$dir"/meta.json
}
mark_run_broken() {
local tag metatmp
tag=${1:-$(cluster_last_meta_tag)}
dir="./runs/${tag}"
local dir=$1 errors=$2 tag
tag=$(run_tag "$dir")
test -n "$2" ||
fail "asked to mark $tag as anomalous, but no anomalies passed"
oprint "marking run as broken (results will be stored separately): $tag"
metatmp=$(mktemp --tmpdir)
jq '{ broken: true } + .
' >"$metatmp" <"$dir/meta.json"
mv "$metatmp" "$dir/meta.json"
json_file_prepend "$dir/analysis.json" '{ anomalies: $anomalies }' \
--argjson anomalies "$errors" <<<0
}
process_broken_run() {
local tag=$1 metatmp
tagroot=$(realpath ./runs)
resultroot=$(realpath ../bench-results-bad)
export tagroot resultroot
local dir=${1:-.}
op_stop
op_bench_fetch "$tag"
mark_run_broken "$tag"
package_tag "$tag"
op_bench_fetch "$dir"
analyse_run "$dir"
package_run "$dir" "$(realpath ../bench-results-bad)"
}

0
scripts/lib-deploy.sh → bench/lib-deploy.sh

19
scripts/lib-params.sh → bench/lib-params.sh

@ -245,22 +245,3 @@ params() {
* ) fail "unknown query: $1";;
esac
}
cluster_last_meta_tag() {
local meta=./last-meta.json tag dir meta2
jq . "${meta}" >/dev/null || fail "malformed run metadata: ${meta}"
tag=$(jq --raw-output .meta.tag "${meta}")
test -n "${tag}" || fail "bad tag in run metadata: ${meta}"
dir="./runs/${tag}"
test -d "${dir}" ||
fail "bad tag in run metadata: ${meta} -- ${dir} is not a directory"
meta2=${dir}/meta.json
jq --exit-status . "${meta2}" >/dev/null ||
fail "bad tag in run metadata: ${meta} -- ${meta2} is not valid JSON"
test "$(realpath ./last-meta.json)" = "$(realpath "${meta2}")" ||
fail "bad tag in run metadata: ${meta} -- ${meta2} is different from ${meta}"
echo "${tag}"
}

0
scripts/lib-profile.sh → bench/lib-profile.sh

58
bench/lib-report.sh

@ -0,0 +1,58 @@
#!/usr/bin/env bash
# shellcheck disable=1091,2016
tag_format_timetoblock_header="tx id,tx time,block time,block no,delta t"
patch_run() {
local dir=${1:-.}
dir=$(realpath "$dir")
if test "$(head -n1 "$dir"/analysis/timetoblock.csv)" \
!= "${tag_format_timetoblock_header}"
then echo "---| patching $dir/analysis/timetoblock.csv"
sed -i "1 s_^_${tag_format_timetoblock_header}\n_; s_;_,_g" \
"$dir"/analysis/timetoblock.csv
fi
if test "$(head -n1 "$dir"/analysis/00-results-table.sql.csv)" \
== "DROP TABLE"
then echo "---| patching $dir/analysis/00-results-table.sql.csv"
tail -n+3 "$dir"/analysis/00-results-table.sql.csv \
> "$dir"/analysis/00-results-table.sql.csv.fixed
mv "$dir"/analysis/00-results-table.sql.csv.fixed \
"$dir"/analysis/00-results-table.sql.csv;
fi
}
run_report_name() {
local metafile meta prof suffix=
dir=${1:-.}
metafile="$dir"/meta.json
meta=$(jq .meta "$metafile" --raw-output)
prof=$(jq .profile <<<$meta --raw-output)
date=$(date +'%Y'-'%m'-'%d'-'%H.%M' --date=@"$(jq .timestamp <<<$meta)")
test -n "$meta" -a -n "$prof" ||
fail "Bad run meta.json format: $metafile"
if is_run_broken "$dir"
then suffix='broken'; fi
echo "$date.$prof${suffix:+.$suffix}"
}
package_run() {
local tag report_name package
dir=${1:-.}
report_name=$(run_report_name "$dir")
if is_run_broken "$dir"
then resultroot=$(realpath ../bench-results-bad)
else resultroot=$(realpath ../bench-results); fi
package=${resultroot}/$report_name.tar.xz
oprint "Packaging $tag as: $package"
ln -sf "./runs/$tag" "$report_name"
tar cf "$package" "$report_name" --xz --dereference
rm -f "$report_name"
}

201
bench/lib-sanity.sh

@ -0,0 +1,201 @@
#!/usr/bin/env bash
# shellcheck disable=1091,2016
sanity_check_list=()
sanity_check_list+=(sanity_check_start_log_spread)
sanity_check_start_log_spread() {
local dir=$1 t=${2:-${default_tolerances}}
sanity_check "$t" "$dir" '
$analysis.logs
| map
( (.earliest - $meta.timestamp | fabs)
as $delta
| select ($delta > $allowed.start_log_spread_s)
| . +
{ delta: $delta
, start: $meta.timestamp })
' '.
| map
({ kind: "start-log-spread"
} + .)
| .[]'
}
sanity_check_list+=(sanity_check_last_log_spread)
sanity_check_last_log_spread() {
local dir=$1 t=${2:-${default_tolerances}}
sanity_check "$t" "$dir" '
$analysis.logs
| map ## Generator always finishes a bit early, and
## we have it analysed to death by other means..
(select (.name != "generator"))
| map
( (.latest - $analysis.final_log_timestamp | fabs)
as $delta
| select ($delta > $allowed.last_log_spread_s)
| . +
{ delta: $delta
, final_log_timestamp: $analysis.final_log_timestamp })
' '.
| map
({ kind: "latest-log-spread"
} + .)
| .[]'
}
sanity_check_list+=(sanity_check_not_even_started)
sanity_check_not_even_started() {
local dir=$1 t=${2:-${default_tolerances}}
sanity_check "$t" "$dir" '
$blocks
| length == 0
' '.
| { kind: "blockchain-not-even-started"
}' --slurpfile blocks "$dir"/analysis/explorer.MsgBlock.json
}
sanity_check_list+=(sanity_check_silence_since_last_block)
sanity_check_silence_since_last_block() {
local dir=$1 t=${2:-${default_tolerances}}
sanity_check "$t" "$dir" '
$blocks[-1] // { timestamp: $analysis.first_node_log_timestamp }
| ($analysis.final_node_log_timestamp - .timestamp)
as $delta
| if $delta >= $allowed.silence_since_last_block_s
then $delta else empty end
' '.
| { kind: "blockchain-stopped"
, silence_since_last_block_s: .
, allowance: $allowed.silence_since_last_block_s
}' --slurpfile blocks "$dir"/analysis/explorer.MsgBlock.json
}
sanity_check_list+=(sanity_check_no_txs_in_blocks)
sanity_check_no_txs_in_blocks() {
local dir=$1 t=${2:-${default_tolerances}}
sanity_check "$t" "$dir" '
$txstats.tx_seen_in_blocks == 0' '
{ kind: "no-txs-in-blocks"
}'
}
sanity_check_list+=(sanity_check_announced_less_txs_than_specified)
sanity_check_announced_less_txs_than_specified() {
local dir=$1 t=${2:-${default_tolerances}}
sanity_check "$t" "$dir" '
## Guard against old logs, where tx_annced is 0:
$txstats.tx_annced >= $txstats.tx_sent and
$prof.generator.tx_count > $txstats.tx_annced' '
{ kind: "announced-less-txs-than-specified"
, required: $prof.generator.tx_count
, sent: $txstats.tx_sent
}'
}
sanity_check_list+=(sanity_check_sent_less_txs_than_specified)
sanity_check_sent_less_txs_than_specified() {
local dir=$1 t=${2:-${default_tolerances}}
sanity_check "$t" "$dir" '
$prof.generator.tx_count > $txstats.tx_sent' '
{ kind: "sent-less-txs-than-specified"
, required: $prof.generator.tx_count
, sent: $txstats.tx_sent
}'
}
sanity_check_list+=(sanity_check_tx_loss_over_threshold)
sanity_check_tx_loss_over_threshold() {
local dir=$1 t=${2:-${default_tolerances}}
sanity_check "$t" "$dir" '
$txstats.tx_sent * (1.0 - $allowed.tx_loss_ratio)
> $txstats.tx_seen_in_blocks' '
{ kind: "txs-loss-over-threshold"
, sent: $txstats.tx_sent
, threshold: ($txstats.tx_sent * (1.0 - $allowed.tx_loss_ratio))
, received: $txstats.tx_seen_in_blocks
}'
}
sanity_check_list+=(sanity_check_chain_density)
sanity_check_chain_density() {
local dir=$1 t=${2:-${default_tolerances}}
sanity_check "$t" "$dir" '
($blocks | length)
as $block_count
| ($analysis.final_node_log_timestamp
- $analysis.first_node_log_timestamp)
as $cluster_lifetime_s
| ($cluster_lifetime_s * 1000 / $genesis.slot_duration | floor)
as $cluster_lifetime_slots
| ($block_count / ($cluster_lifetime_slots))
as $chain_density
| ($cluster_lifetime_slots - $block_count)
as $missed_slots
| if $chain_density < $allowed.minimum_chain_density or
$missed_slots > $allowed.maximum_missed_slots
then { lifetime_s: $cluster_lifetime_s
, lifetime_slots: $cluster_lifetime_slots
, block_count: $block_count
, missed_slots: $missed_slots
, chain_density: $chain_density
} else empty end' '
{ kind: "insufficient_overall_chain_density"
, lifetime_s: .lifetime_s
, lifetime_slots: .lifetime_slots
, block_count: .block_count
, missed_slots: .missed_slots
, chain_density: .chain_density
}' --slurpfile blocks "$dir"/analysis/explorer.MsgBlock.json
}
# sanity_check_list+=(sanity_check_)
# sanity_check_() {
# local t=$1 dir=$2
# }
default_tolerances='
{ "tx_loss_ratio": 0.0
, "start_log_spread_s": 60
, "last_log_spread_s": 60
, "silence_since_last_block_s": 40
, "cluster_startup_overhead_s": 60
, "minimum_chain_density": 0.9
, "maximum_missed_slots": 5
}'
sanity_check_run() {
local dir=${1:-.} metafile meta prof tolerances t
for check in ${sanity_check_list[*]}
do $check "$dir" "${default_tolerances}"
done | jq --slurp '
if length != 0
then . +
[{ kind: "tolerances" }
+ $tolerances] else . end
' --argjson tolerances "$default_tolerances"
}
sanity_check() {
local tolerances=$1 dir=$2 test=$3 err=$4; shift 4
sanity_checker "$tolerances" "$dir" \
" ($test)"' as $test
| if $test != {} and $test != [] and $test != "" and $test
then ($test | '"$err"') else empty end
' "$@"
}
sanity_checker() {
local tolerances=$1 dir=$2 expr=$3; shift 3
jq ' $meta[0].meta as $meta
| $analysis[0] as $analysis
| $txstats[0] as $txstats
| ($meta.profile_content
## TODO: backward compat
// $meta.generator_params)
as $prof
| ($prof.genesis
## TODO: backward compat
// $prof.genesis_params)
as $genesis
| $prof.generator as $generator
| '"$expr"'
' --slurpfile meta "$dir/meta.json" \
--slurpfile analysis "$dir/analysis.json" \
--slurpfile txstats "$dir/analysis/tx-stats.json" \
--argjson allowed "$tolerances" \
"$@" <<<0
}

25
bench/lib-sheets.sh

@ -0,0 +1,25 @@
#!/usr/bin/env bash
# shellcheck disable=1091,2016
sheet_list=()
sheet_list+=(sheet_message_types_summary)
sheet_message_types_summary() {
local dir=${1:-.} name
name=$(echo ${FUNCNAME[0]} | cut -d_ -f2-)
mkdir -p "$dir"/report
jq ' .message_types
| to_entries
| map ( .key as $mach
| .value
| to_entries
| map([ $mach, .key, .value | tostring]))
| add
| .[]
| join(",")' < "$dir"/analysis.json --raw-output \
> "$dir"/report/"$name".csv
sed -i '1inode, message, occurences' "$dir"/report/"$name".csv
}

61
bench/lib-tag.sh

@ -0,0 +1,61 @@
#!/usr/bin/env bash
# shellcheck disable=1091,2016
run_tag() {
jq --raw-output .meta.tag "$(realpath "${1:-.}")/meta.json"
}
cluster_last_meta_tag() {
local meta=./last-meta.json tag dir meta2
jq . "${meta}" >/dev/null || fail "malformed run metadata: ${meta}"
tag=$(jq --raw-output .meta.tag "${meta}")
test -n "${tag}" || fail "bad tag in run metadata: ${meta}"
dir="./runs/${tag}"
test -d "${dir}" ||
fail "bad tag in run metadata: ${meta} -- ${dir} is not a directory"
meta2=${dir}/meta.json
jq --exit-status . "${meta2}" >/dev/null ||
fail "bad tag in run metadata: ${meta} -- ${meta2} is not valid JSON"
test "$(realpath ./last-meta.json)" = "$(realpath "${meta2}")" ||
fail "bad tag in run metadata: ${meta} -- ${meta2} is different from ${meta}"
echo "${tag}"
}
fetch_tag() {
local tag
tag=${1:-$(cluster_last_meta_tag)}
fetch_run "./runs/${tag}"
}
analyse_tag() {
local tag
tag=${1:-$(cluster_last_meta_tag)}
analyse_run "${tagroot}/${tag}" || true
}
sanity_check_tag() {
local tag
tag=${1:-$(cluster_last_meta_tag)}
sanity_check_run "${tagroot}/${tag}"
}
tag_report_name() {
local tag
tag=${1:-$(cluster_last_meta_tag)}
run_report_name "${tagroot}/${tag}"
}
package_tag() {
local tag
tag=${1:-$(cluster_last_meta_tag)}
package_run "${tagroot}/${tag}"
}

26
scripts/lib.sh → bench/lib.sh

@ -80,6 +80,32 @@ words_to_lines() {
sed 's_ _\n_g'
}
json_file_append() {
local f=$1 extra=$2 tmp; shift 2
tmp=$(mktemp --tmpdir)
test -f "$f" || echo "{}" > "$f"
jq ' $origf[0] as $orig
| $orig + ('"$extra"')
' --slurpfile origf "$f" "$@" > "$tmp"
mv "$tmp" "$f"
}
json_file_prepend() {
local f=$1 extra=$2 tmp; shift 2
tmp=$(mktemp --tmpdir)
test -f "$f" || echo "{}" > "$f"
jq ' $origf[0] as $orig
| ('"$extra"') + $orig
' --slurpfile origf "$f" "$@" > "$tmp"
mv "$tmp" "$f"
}
shell_list_to_json() {
words_to_lines | jq --raw-input | jq --slurp --compact-output
}
generate_mnemonic()
{
local mnemonic timestamp commit status

8
nix/sources.json

@ -12,15 +12,15 @@
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
},
"cardano-benchmarking": {
"branch": "cad-918-debugging-tx-generator",
"branch": "cad-1073-deploy",
"description": "Epic win",
"homepage": null,
"owner": "input-output-hk",
"repo": "cardano-benchmarking",
"rev": "8926632a845a9e4e5d1af6e1f28c4d5a1c6ef45a",
"sha256": "02mpazqafscr9air6ny0bb0c0if3gdwjvf01jk0hgf4ygf3r7skd",
"rev": "3f82ccfc899f75db675a59362ac6a6f891b60dec",
"sha256": "18kb2ih0g8j1pn9rsrpd6k29blab6s3zfxd82daijds029xr7700",
"type": "tarball",
"url": "https://github.com/input-output-hk/cardano-benchmarking/archive/8926632a845a9e4e5d1af6e1f28c4d5a1c6ef45a.tar.gz",
"url": "https://github.com/input-output-hk/cardano-benchmarking/archive/3f82ccfc899f75db675a59362ac6a6f891b60dec.tar.gz",
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
},
"cardano-byron-proxy": {

147
scripts/lib-analysis.sh

@ -1,147 +0,0 @@
#!/usr/bin/env bash
# shellcheck disable=1091
tmjq() {
jq .meta "${tagroot}/$1/"meta.json --raw-output
}
tag_report_name() {
local tag metafile meta prof suffix=
tag=${1:-$(cluster_last_meta_tag)}
metafile=${tagroot}/$tag/meta.json
meta=$(jq .meta "$metafile" --raw-output)
prof=$(jq .profile --raw-output <<<$meta)
date=$(date +'%Y'-'%m'-'%d'-'%H.%M' --date=@"$(jq .timestamp <<<$meta)")
test -n "$meta" -a -n "$prof" ||
fail "Bad tag meta.json format: $metafile"
if is_run_broken "$tag"
then suffix='broken'; fi
echo "$date.$prof${suffix:+.$suffix}"
}
package_tag() {
local tag package report_name
tag=${1:-$(cluster_last_meta_tag)}
report_name=$(tag_report_name "$tag")
if is_run_broken "$tag"
then resultroot=$(realpath ../bench-results-bad)
else resultroot=$(realpath ../bench-results); fi
package=${resultroot}/$report_name.tar.xz
oprint "Packaging $tag as: $package"
ln -sf "./runs/$tag" "$report_name"
tar cf "$package" "$report_name" --xz --dereference
rm -f "$report_name"
}
analyse_tag() {
local tag dir meta
tag=${1:-$(cluster_last_meta_tag)}
dir="${tagroot}/${tag}"
pushd "${dir}" >/dev/null || return 1
rm -rf 'analysis'
mkdir 'analysis'
cd 'analysis'
meta=$(tmjq "$tag" .)
oprint "running log analyses: "
tar xaf '../logs/logs-explorer.tar.xz'
tar xaf '../logs/logs-nodes.tar.xz'
echo " timetoblock.csv"
../tools/analyse.sh \
'logs-explorer/generator' \
'logs-explorer/node' \
'last-run/analysis/'
cp analysis/timetoblock.csv .
local blocks
echo -n "--( running log analyses: blocksizes"
blocks="$(../tools/blocksizes.sh logs-explorer/node-*.json |
jq . --slurp)"
declare -A msgtys
local mach msgtys=() producers tnum sub_tids
producers=($(jq '.machine_info | keys | join(" ")
' --raw-output <'../deployment-explorer.json'))
for mach in explorer ${producers[*]}
do echo -n " msgtys:${mach}"
msgtys[${mach}]="$(../tools/msgtypes.sh logs-explorer/node-*.json |
jq . --slurp)"; done
## NOTE: This is a bit too costly, and we know the generator pretty well.
# echo -n " msgtys:generator"
# msgtys_generator="$(../tools/msgtypes.sh logs-explorer/generator.json |
# jq . --slurp)"
msgtys_generator='[]'
echo -n " node-to-node-submission-tids"
sub_tids="$(../tools/generator-logs.sh log-tids \
logs-explorer/generator.json || true)"
for tnum in $(seq 0 $(($(echo "$sub_tids" | wc -w) - 1)))
do echo -n " node-to-node-submission:${tnum}"
../tools/generator-logs.sh tid-trace "${tnum}" \
logs-explorer/generator.json \
> generator.submission-thread-trace."${tnum}".json; done
for p in ${producers[*]}
do echo -n " added-to-current-chain:$p"
../tools/added-to-current-chain.sh logs-node-*/node-*.json \
> $p.added-to-current-chain.csv; done
jq '{ tx_stats: $txstats[0]
, submission_tids: '"$(jq --slurp <<<$sub_tids)"'
, MsgBlock: '"${blocks}"'
, message_kinds:
({ generator: '"${msgtys_generator}"'
}'"$(for mach in ${!msgtys[*]}
do echo " + { \"$mach\": $(jq --slurp <<<${msgtys[$mach]}) }"
done)"')
}' --null-input \
--slurpfile txstats 'analysis/tx-stats.json' \
> ../analysis.json
echo -n " adding db-analysis"
tar xaf '../logs/db-analysis.tar.xz' --wildcards '*.csv' '*.txt'
if jqtest '(.tx_stats.tx_missing != 0)' ../analysis.json
then echo " missing-txs"
. ../tools/lib-loganalysis.sh
op_analyse_losses
else echo
fi
patch_local_tag "$tag"
rm -rf analysis/ logs-node-*/ logs-explorer/ startup/
popd >/dev/null
oprint "analysed tag: ${tag}"
}
tag_format_timetoblock_header="tx id,tx time,block time,block no,delta t"
patch_local_tag() {
local tag=${1?missing tag} target
target=${tagroot}/${tag}
cd "${target}" >/dev/null || return 1
if test "$(head -n1 analysis/timetoblock.csv)" != "${tag_format_timetoblock_header}"
then echo "---| patching ${tag}/analysis/timetoblock.csv"
sed -i "1 s_^_${tag_format_timetoblock_header}\n_; s_;_,_g" \
'analysis/timetoblock.csv'
fi
if test "$(head -n1 analysis/00-results-table.sql.csv)" == "DROP TABLE"
then echo "---| patching ${tag}/analysis/00-results-table.sql.csv"
tail -n+3 analysis/00-results-table.sql.csv > analysis/00-results-table.sql.csv.fixed
mv analysis/00-results-table.sql.csv.fixed analysis/00-results-table.sql.csv;
fi
cd - >/dev/null || return 1
}
Loading…
Cancel
Save