Last active
June 7, 2025 20:55
-
-
Save daxaxelrod/1c5d8d7dd448a99ba155f58bb3012b29 to your computer and use it in GitHub Desktop.
Faster eval script for 8090 challenge
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Black Box Challenge Evaluation Script | |
# This script tests your reimbursement calculation implementation against 1,000 historical cases | |
set -e | |
echo "π§Ύ Black Box Challenge - Reimbursement System Evaluation" | |
echo "=======================================================" | |
echo | |
# Check if jq is available | |
if ! command -v jq &> /dev/null; then | |
echo "β Error: jq is required but not installed!" | |
echo "Please install jq to parse JSON files:" | |
echo " macOS: brew install jq" | |
echo " Ubuntu/Debian: sudo apt-get install jq" | |
echo " CentOS/RHEL: sudo yum install jq" | |
exit 1 | |
fi | |
# Check if bc is available for floating point arithmetic | |
if ! command -v bc &> /dev/null; then | |
echo "β Error: bc (basic calculator) is required but not installed!" | |
echo "Please install bc for floating point calculations:" | |
echo " macOS: brew install bc" | |
echo " Ubuntu/Debian: sudo apt-get install bc" | |
echo " CentOS/RHEL: sudo yum install bc" | |
exit 1 | |
fi | |
# Check if run.sh exists | |
if [ ! -f "run.sh" ]; then | |
echo "β Error: run.sh not found!" | |
echo "Please create a run.sh script that takes three parameters:" | |
echo " ./run.sh <trip_duration_days> <miles_traveled> <total_receipts_amount>" | |
echo " and outputs the reimbursement amount" | |
exit 1 | |
fi | |
# Make run.sh executable | |
chmod +x run.sh | |
# Check if public cases exist | |
if [ ! -f "public_cases.json" ]; then | |
echo "β Error: public_cases.json not found!" | |
echo "Please ensure the public cases file is in the current directory." | |
exit 1 | |
fi | |
echo "π Running evaluation against 1,000 test cases..." | |
echo | |
# Extract all test data upfront in a single jq call for better performance | |
echo "Extracting test data..." | |
test_data=$(jq -r '.[] | "\(.input.trip_duration_days):\(.input.miles_traveled):\(.input.total_receipts_amount):\(.expected_output)"' public_cases.json) | |
# Convert to arrays for faster access (compatible with bash 3.2+) | |
test_cases=() | |
while IFS= read -r line; do | |
test_cases+=("$line") | |
done <<< "$test_data" | |
num_cases=${#test_cases[@]} | |
# Initialize counters and arrays | |
successful_runs=0 | |
exact_matches=0 | |
close_matches=0 | |
total_error="0" | |
max_error="0" | |
max_error_case="" | |
results_array=() | |
errors_array=() | |
# Set the number of parallel jobs | |
MAX_JOBS=16 | |
# Clean up temp files | |
rm -f results.tmp errors.tmp | |
# Launch jobs in parallel and collect results | |
joblist=() | |
for ((i=0; i<num_cases; i++)); do | |
( | |
if (( (i+1) % 100 == 0 )); then | |
echo "$((i+1))/$num_cases cases processed..." | |
fi | |
local_line="${test_cases[i]}" | |
IFS=':' read -r trip_duration miles_traveled receipts_amount expected <<< "$local_line" | |
if script_output=$(./run.sh "$trip_duration" "$miles_traveled" "$receipts_amount" 2>/dev/null); then | |
output=$(echo "$script_output" | tr -d '[:space:]') | |
if [[ $output =~ ^-?[0-9]+\.?[0-9]*$ ]]; then | |
actual="$output" | |
error=$(echo "scale=10; if ($actual - $expected < 0) -1 * ($actual - $expected) else ($actual - $expected)" | bc) | |
echo "$((i+1)):$expected:$actual:$error:$trip_duration:$miles_traveled:$receipts_amount" >> results.tmp | |
else | |
echo "Case $((i+1)): Invalid output format: $output" >> errors.tmp | |
fi | |
else | |
error_msg=$(./run.sh "$trip_duration" "$miles_traveled" "$receipts_amount" 2>&1 >/dev/null | tr -d '\n') | |
echo "Case $((i+1)): Script failed with error: $error_msg" >> errors.tmp | |
fi | |
) & | |
joblist+=($!) | |
if [ "${#joblist[@]}" -ge "$MAX_JOBS" ]; then | |
wait "${joblist[0]}" | |
joblist=("${joblist[@]:1}") | |
fi | |
done | |
wait | |
# Read results into arrays and update counters | |
if [ -f results.tmp ]; then | |
while IFS=: read -r case_num expected actual error trip_duration miles_traveled receipts_amount; do | |
results_array+=("$case_num:$expected:$actual:$error:$trip_duration:$miles_traveled:$receipts_amount") | |
successful_runs=$((successful_runs + 1)) | |
# Check for exact match (within $0.01) | |
if (( $(echo "$error < 0.01" | bc -l) )); then | |
exact_matches=$((exact_matches + 1)) | |
fi | |
# Check for close match (within $1.00) | |
if (( $(echo "$error < 1.0" | bc -l) )); then | |
close_matches=$((close_matches + 1)) | |
fi | |
# Update total error | |
total_error=$(echo "scale=10; $total_error + $error" | bc) | |
# Track maximum error | |
if (( $(echo "$error > $max_error" | bc -l) )); then | |
max_error="$error" | |
max_error_case="Case $case_num: $trip_duration days, $miles_traveled miles, \$$receipts_amount receipts" | |
fi | |
done < results.tmp | |
fi | |
if [ -f errors.tmp ]; then | |
while IFS= read -r errline; do | |
errors_array+=("$errline") | |
done < errors.tmp | |
fi | |
rm -f results.tmp errors.tmp | |
# Calculate and display results | |
if [ $successful_runs -eq 0 ]; then | |
echo "β No successful test cases!" | |
echo "" | |
echo "Your script either:" | |
echo " - Failed to run properly" | |
echo " - Produced invalid output format" | |
echo " - Timed out on all cases" | |
echo "" | |
echo "Check the errors below for details." | |
else | |
# Calculate average error | |
avg_error=$(echo "scale=2; $total_error / $successful_runs" | bc) | |
# Calculate percentages | |
exact_pct=$(echo "scale=1; $exact_matches * 100 / $successful_runs" | bc) | |
close_pct=$(echo "scale=1; $close_matches * 100 / $successful_runs" | bc) | |
echo "β Evaluation Complete!" | |
echo "" | |
echo "π Results Summary:" | |
echo " Total test cases: $num_cases" | |
echo " Successful runs: $successful_runs" | |
echo " Exact matches (Β±\$0.01): $exact_matches (${exact_pct}%)" | |
echo " Close matches (Β±\$1.00): $close_matches (${close_pct}%)" | |
echo " Average error: \$${avg_error}" | |
echo " Maximum error: \$${max_error}" | |
echo "" | |
# Calculate score (lower is better) | |
score=$(echo "scale=2; $avg_error * 100 + ($num_cases - $exact_matches) * 0.1" | bc) | |
echo "π― Your Score: $score (lower is better)" | |
echo "" | |
# Provide feedback based on exact matches | |
if [ $exact_matches -eq $num_cases ]; then | |
echo "π PERFECT SCORE! You have reverse-engineered the system completely!" | |
elif [ $exact_matches -gt 950 ]; then | |
echo "π₯ Excellent! You are very close to the perfect solution." | |
elif [ $exact_matches -gt 800 ]; then | |
echo "π₯ Great work! You have captured most of the system behavior." | |
elif [ $exact_matches -gt 500 ]; then | |
echo "π₯ Good progress! You understand some key patterns." | |
else | |
echo "π Keep analyzing the patterns in the interviews and test cases." | |
fi | |
echo "" | |
echo "π‘ Tips for improvement:" | |
if [ $exact_matches -lt $num_cases ]; then | |
echo " Check these high-error cases:" | |
# Sort results by error (descending) in memory and show top 5 | |
IFS=$'\n' high_error_cases=($(printf '%s\n' "${results_array[@]}" | sort -t: -k4 -nr | head -15)) | |
for result in "${high_error_cases[@]}"; do | |
IFS=: read -r case_num expected actual error trip_duration miles_traveled receipts_amount <<< "$result" | |
printf " Case %s: %s days, %s miles, \$%s receipts\n" "$case_num" "$trip_duration" "$miles_traveled" "$receipts_amount" | |
printf " Expected: \$%.2f, Got: \$%.2f, Error: \$%.2f\n" "$expected" "$actual" "$error" | |
done | |
fi | |
fi | |
# Show errors if any | |
if [ ${#errors_array[@]} -gt 0 ]; then | |
echo | |
echo "β οΈ Errors encountered:" | |
for ((j=0; j<${#errors_array[@]} && j<10; j++)); do | |
echo " ${errors_array[j]}" | |
done | |
if [ ${#errors_array[@]} -gt 10 ]; then | |
echo " ... and $((${#errors_array[@]} - 10)) more errors" | |
fi | |
fi | |
echo | |
echo "π Next steps:" | |
echo " 1. Fix any script errors shown above" | |
echo " 2. Ensure your run.sh outputs only a number" | |
echo " 3. Analyze the patterns in the interviews and public cases" | |
echo " 4. Test edge cases around trip length and receipt amounts" | |
echo " 5. Submit your solution via the Google Form when ready!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks man!