diff options
author | Adam M. Stück <adam@adast.xyz> | 2022-12-17 15:41:29 +0100 |
---|---|---|
committer | Adam M. Stück <adam@adast.xyz> | 2022-12-17 19:26:07 +0100 |
commit | d1828d9658cb3e17984be757f57224a9e98bce50 (patch) | |
tree | fdb15fe1d8abf2742e8382792381b48dc1564f9e | |
parent | 748bf8ca8e76d7dd48ba744c8a9e405b8ee9de0a (diff) |
Optimized 'archiver' script with clever use of awkmain
Seems to run about 4 times faster now. Which is a decent time save when
running the script on the cluster.
Uses a magic awk command to concatenate all raw .csv files (excluding
their headers). This replaces the old way which consisted of reading
each file, trimming the header, and appending the lines to a HUGE
variable.
-rwxr-xr-x | archiver | 37 |
1 files changed, 13 insertions, 24 deletions
@@ -17,38 +17,29 @@ main() { exit 1 fi - aggregate_files + if [[ -f "$RESULTS_PATH/data.csv" ]]; then + rm "$RESULTS_PATH/data.csv" + fi + + aggregate_files > "$RESULTS_PATH/data.csv" } aggregate_files() { - OUTPUT="" - HEADER_INSERTED=false - - while IFS= read -r -d '' ENTRY - do - if [ $HEADER_INSERTED == true ]; then - FILE=$(tail -n +2 "$ENTRY") - else - FILE=$(cat "$ENTRY") - fi - - OUTPUT+="$FILE\n" - HEADER_INSERTED=true - done < <(find "$RESULTS_PATH" -maxdepth 1 -name '*.csv' ! -name 'data.csv' -type f -print0) - - HEADER=$(echo -e "$OUTPUT" | head -n 1) - ALL_ROWS=$(echo -e "$OUTPUT" | tail -n +2 | sort -t$'\t' -k6,6 -n) - STRATS=$(echo -e "$ALL_ROWS" | awk -F '\t' '{print $5}' | sort | uniq) + ALL_ROWS=$(awk FNR-1 "$RESULTS_PATH"/*.csv 2>/dev/null | sort -t$'\t' -k6,6 -n) + STRATS=$(echo "$ALL_ROWS" | awk -F '\t' '{print $5}' | sort | uniq) OUTPUT="" while read -r STRAT; do [ -z "$STRAT" ] && continue - ROWS=$(echo -e "$ALL_ROWS" | grep -P "\t$STRAT\t") + ROWS=$(echo "$ALL_ROWS" | grep -P "\t$STRAT\t") OUTPUT+="$ROWS\n" done <<< "$STRATS" - OUTPUT=$(echo -e "$HEADER\n$OUTPUT" | head -n -1) - echo -e "$OUTPUT" > "$RESULTS_PATH/data.csv" + echo -e "$(header)\n$OUTPUT" | head -n-1 +} + +header() { + echo -e "model\tquery\tsolved\tresult\tstrategy\ttime\tdate\ttime-limit\tmemory\texit-code\tformula\ttimed-out\terror-msg\tdiscoveredStates\texploredStates\texpandedStates\tmaxTokens\tsearchTime" } help() { @@ -57,8 +48,6 @@ usage: $0 RESULTS-DIR Aggregate data from search stragey benchmark -Options: - -h, --help Show this message EOF } |