summaryrefslogtreecommitdiff
path: root/archiver
diff options
context:
space:
mode:
authorAdam M. Stück <adam@adast.xyz>2022-12-17 15:41:29 +0100
committerAdam M. Stück <adam@adast.xyz>2022-12-17 19:26:07 +0100
commitd1828d9658cb3e17984be757f57224a9e98bce50 (patch)
treefdb15fe1d8abf2742e8382792381b48dc1564f9e /archiver
parent748bf8ca8e76d7dd48ba744c8a9e405b8ee9de0a (diff)
Optimized 'archiver' script with clever use of awkmain
Seems to run about 4 times faster now. Which is a decent time save when running the script on the cluster. Uses a magic awk command to concatenate all raw .csv files (excluding their headers). This replaces the old way which consisted of reading each file, trimming the header, and appending the lines to a HUGE variable.
Diffstat (limited to 'archiver')
-rwxr-xr-xarchiver37
1 files changed, 13 insertions, 24 deletions
diff --git a/archiver b/archiver
index 58b5bf5..8f78f2b 100755
--- a/archiver
+++ b/archiver
@@ -17,38 +17,29 @@ main() {
exit 1
fi
- aggregate_files
+ if [[ -f "$RESULTS_PATH/data.csv" ]]; then
+ rm "$RESULTS_PATH/data.csv"
+ fi
+
+ aggregate_files > "$RESULTS_PATH/data.csv"
}
aggregate_files() {
- OUTPUT=""
- HEADER_INSERTED=false
-
- while IFS= read -r -d '' ENTRY
- do
- if [ $HEADER_INSERTED == true ]; then
- FILE=$(tail -n +2 "$ENTRY")
- else
- FILE=$(cat "$ENTRY")
- fi
-
- OUTPUT+="$FILE\n"
- HEADER_INSERTED=true
- done < <(find "$RESULTS_PATH" -maxdepth 1 -name '*.csv' ! -name 'data.csv' -type f -print0)
-
- HEADER=$(echo -e "$OUTPUT" | head -n 1)
- ALL_ROWS=$(echo -e "$OUTPUT" | tail -n +2 | sort -t$'\t' -k6,6 -n)
- STRATS=$(echo -e "$ALL_ROWS" | awk -F '\t' '{print $5}' | sort | uniq)
+ ALL_ROWS=$(awk FNR-1 "$RESULTS_PATH"/*.csv 2>/dev/null | sort -t$'\t' -k6,6 -n)
+ STRATS=$(echo "$ALL_ROWS" | awk -F '\t' '{print $5}' | sort | uniq)
OUTPUT=""
while read -r STRAT; do
[ -z "$STRAT" ] && continue
- ROWS=$(echo -e "$ALL_ROWS" | grep -P "\t$STRAT\t")
+ ROWS=$(echo "$ALL_ROWS" | grep -P "\t$STRAT\t")
OUTPUT+="$ROWS\n"
done <<< "$STRATS"
- OUTPUT=$(echo -e "$HEADER\n$OUTPUT" | head -n -1)
- echo -e "$OUTPUT" > "$RESULTS_PATH/data.csv"
+ echo -e "$(header)\n$OUTPUT" | head -n-1
+}
+
+header() {
+ echo -e "model\tquery\tsolved\tresult\tstrategy\ttime\tdate\ttime-limit\tmemory\texit-code\tformula\ttimed-out\terror-msg\tdiscoveredStates\texploredStates\texpandedStates\tmaxTokens\tsearchTime"
}
help() {
@@ -57,8 +48,6 @@ usage: $0 RESULTS-DIR
Aggregate data from search stragey benchmark
-Options:
- -h, --help Show this message
EOF
}