From d1828d9658cb3e17984be757f57224a9e98bce50 Mon Sep 17 00:00:00 2001 From: "Adam M. Stück" Date: Sat, 17 Dec 2022 15:41:29 +0100 Subject: Optimized 'archiver' script with clever use of awk Seems to run about 4 times faster now. Which is a decent time save when running the script on the cluster. Uses a magic awk command to concatenate all raw .csv files (excluding their headers). This replaces the old way which consisted of reading each file, trimming the header, and appending the lines to a HUGE variable. --- archiver | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'archiver') diff --git a/archiver b/archiver index 58b5bf5..8f78f2b 100755 --- a/archiver +++ b/archiver @@ -17,38 +17,29 @@ main() { exit 1 fi - aggregate_files + if [[ -f "$RESULTS_PATH/data.csv" ]]; then + rm "$RESULTS_PATH/data.csv" + fi + + aggregate_files > "$RESULTS_PATH/data.csv" } aggregate_files() { - OUTPUT="" - HEADER_INSERTED=false - - while IFS= read -r -d '' ENTRY - do - if [ $HEADER_INSERTED == true ]; then - FILE=$(tail -n +2 "$ENTRY") - else - FILE=$(cat "$ENTRY") - fi - - OUTPUT+="$FILE\n" - HEADER_INSERTED=true - done < <(find "$RESULTS_PATH" -maxdepth 1 -name '*.csv' ! -name 'data.csv' -type f -print0) - - HEADER=$(echo -e "$OUTPUT" | head -n 1) - ALL_ROWS=$(echo -e "$OUTPUT" | tail -n +2 | sort -t$'\t' -k6,6 -n) - STRATS=$(echo -e "$ALL_ROWS" | awk -F '\t' '{print $5}' | sort | uniq) + ALL_ROWS=$(awk FNR-1 "$RESULTS_PATH"/*.csv 2>/dev/null | sort -t$'\t' -k6,6 -n) + STRATS=$(echo "$ALL_ROWS" | awk -F '\t' '{print $5}' | sort | uniq) OUTPUT="" while read -r STRAT; do [ -z "$STRAT" ] && continue - ROWS=$(echo -e "$ALL_ROWS" | grep -P "\t$STRAT\t") + ROWS=$(echo "$ALL_ROWS" | grep -P "\t$STRAT\t") OUTPUT+="$ROWS\n" done <<< "$STRATS" - OUTPUT=$(echo -e "$HEADER\n$OUTPUT" | head -n -1) - echo -e "$OUTPUT" > "$RESULTS_PATH/data.csv" + echo -e "$(header)\n$OUTPUT" | head -n-1 +} + +header() { + echo -e "model\tquery\tsolved\tresult\tstrategy\ttime\tdate\ttime-limit\tmemory\texit-code\tformula\ttimed-out\terror-msg\tdiscoveredStates\texploredStates\texpandedStates\tmaxTokens\tsearchTime" } help() { @@ -57,8 +48,6 @@ usage: $0 RESULTS-DIR Aggregate data from search stragey benchmark -Options: - -h, --help Show this message EOF } -- cgit v1.2.3-70-g09d2