flodolo · January 2, 2025 06:43
diff --git a/analyze.py b/analyze.py
 #! /usr/bin/env python

 import glob
 import json


 # Find all .json files in logs/
 json_files = glob.glob("logs/*.json")
 json_files.sort()

 results = {
    "events": 0,
    "searches": 0,
    "search_ids": 0,
    "exclude_source": 0,
    "include_rejected": 0,
    "match_case": 0,
    "whole_words": 0,
 }

 results_daily = {}

 for json_file in json_files:
    print(f"Processing file: {json_file}")
    day = json_file[5:15]
    with open(json_file, "r") as f:
        data = f.readlines()
        if day not in results_daily:
            results_daily[day] = {
                "events": 0,
                "searches": 0,
                "search_ids": 0,
                "exclude_source": 0,
                "include_rejected": 0,
                "match_case": 0,
                "whole_words": 0,
            }
        for line in data:
            results["events"] += 1
            results_daily[day]["events"] += 1
            json_data = json.loads(line)
            path = json_data.get("heroku", {}).get("path", "")
            if "search=" in path:
                results["searches"] += 1
                results_daily[day]["searches"] += 1
                if "search_identifiers=true" in path:
                    results["search_ids"] += 1
                    results_daily[day]["search_ids"] += 1
                if "search_exclude_source_strings=true" in path:
                    results["exclude_source"] += 1
                    results_daily[day]["exclude_source"] += 1
                if "search_rejected_translations=true" in path:
                    results["include_rejected"] += 1
                    results_daily[day]["include_rejected"] += 1
                if "search_match_case=true" in path:
                    results["match_case"] += 1
                    results_daily[day]["match_case"] += 1
                if "search_match_whole_word=true" in path:
                    results["whole_words"] += 1
                    results_daily[day]["whole_words"] += 1


 print(
    f"""
 Logs between {json_files[0][5:15]} and {json_files[-1][5:15]}

 Total events: {results["events"]:,}
 Total searches: {results["searches"]:,}
 Include search IDs: {results["search_ids"]:,} ({round((results["search_ids"]/results["searches"])*100, 1)}%)
 Exclude source string: {results["exclude_source"]:,} ({round((results["exclude_source"]/results["searches"])*100, 2)}%)
 Include_rejected translations: {results["include_rejected"]:,} ({round((results["include_rejected"]/results["searches"])*100, 2)}%)
 Match case: {results["match_case"]} ({round((results["match_case"]/results["searches"])*100, 2)}%)
 Whole words: {results["whole_words"]} ({round((results["whole_words"]/results["searches"])*100, 2)}%)
 """
 )

 # Generate CSV output
 output = [
    "Day,Events,Searches,Include IDs,Exclude Source,Include Rejected,Match Case,Whole Words"
 ]
 for day, day_data in results_daily.items():
    output.append(
        f"{day},{day_data['events']},{day_data['searches']},{day_data['search_ids']},{day_data['exclude_source']},{day_data['include_rejected']},{day_data['match_case']},{day_data['whole_words']},"
    )

 with open("output.csv", "w") as f:
    f.write("\n".join(output))
diff --git a/output.csv b/output.csv
 Day,Events,Searches,Include IDs,Exclude Source,Include Rejected,Match Case,Whole Words
 2024-12-09,601678,26975,14151,10228,0,0,0,
 2024-12-10,628486,14053,3898,2213,0,0,0,
 2024-12-11,548421,14312,6584,3192,0,0,0,
 2024-12-12,526714,13853,5926,2622,0,0,0,
 2024-12-13,537543,9286,4993,2288,0,0,0,
 2024-12-14,591097,8854,2707,663,0,0,0,
 2024-12-15,565133,8293,2996,539,0,0,0,
 2024-12-16,597035,21817,14523,5171,0,0,0,
 2024-12-17,1147842,14604,10588,3054,0,0,0,
 2024-12-18,642743,4493,1711,489,0,0,0,
 2024-12-19,515669,4214,2116,653,0,0,0,
 2024-12-20,752621,9716,6671,2661,0,0,0,
 2024-12-21,1463417,42649,4848,1895,0,0,0,
 2024-12-22,1243973,10878,8055,4868,0,0,0,
 2024-12-23,742328,15712,10992,1023,0,0,0,
 2024-12-24,725259,27390,22369,13591,0,0,0,
 2024-12-25,784661,31655,18841,13466,0,0,2,
 2024-12-26,809288,30271,23391,9404,1,1,1,
 2024-12-27,728427,26820,22519,3523,0,0,0,
 2024-12-28,618093,19434,14002,11245,0,0,0,
 2024-12-29,676242,25207,21665,19589,0,0,0,
	#! /usr/bin/env python

	import glob
	import json


	# Find all .json files in logs/
	json_files = glob.glob("logs/*.json")
	json_files.sort()

	results = {
	"events": 0,
	"searches": 0,
	"search_ids": 0,
	"exclude_source": 0,
	"include_rejected": 0,
	"match_case": 0,
	"whole_words": 0,
	}

	results_daily = {}

	for json_file in json_files:
	print(f"Processing file: {json_file}")
	day = json_file[5:15]
	with open(json_file, "r") as f:
	data = f.readlines()
	if day not in results_daily:
	results_daily[day] = {
	"events": 0,
	"searches": 0,
	"search_ids": 0,
	"exclude_source": 0,
	"include_rejected": 0,
	"match_case": 0,
	"whole_words": 0,
	}
	for line in data:
	results["events"] += 1
	results_daily[day]["events"] += 1
	json_data = json.loads(line)
	path = json_data.get("heroku", {}).get("path", "")
	if "search=" in path:
	results["searches"] += 1
	results_daily[day]["searches"] += 1
	if "search_identifiers=true" in path:
	results["search_ids"] += 1
	results_daily[day]["search_ids"] += 1
	if "search_exclude_source_strings=true" in path:
	results["exclude_source"] += 1
	results_daily[day]["exclude_source"] += 1
	if "search_rejected_translations=true" in path:
	results["include_rejected"] += 1
	results_daily[day]["include_rejected"] += 1
	if "search_match_case=true" in path:
	results["match_case"] += 1
	results_daily[day]["match_case"] += 1
	if "search_match_whole_word=true" in path:
	results["whole_words"] += 1
	results_daily[day]["whole_words"] += 1


	print(
	f"""
	Logs between {json_files[0][5:15]} and {json_files[-1][5:15]}

	Total events: {results["events"]:,}
	Total searches: {results["searches"]:,}
	Include search IDs: {results["search_ids"]:,} ({round((results["search_ids"]/results["searches"])*100, 1)}%)
	Exclude source string: {results["exclude_source"]:,} ({round((results["exclude_source"]/results["searches"])*100, 2)}%)
	Include_rejected translations: {results["include_rejected"]:,} ({round((results["include_rejected"]/results["searches"])*100, 2)}%)
	Match case: {results["match_case"]} ({round((results["match_case"]/results["searches"])*100, 2)}%)
	Whole words: {results["whole_words"]} ({round((results["whole_words"]/results["searches"])*100, 2)}%)
	"""
	)

	# Generate CSV output
	output = [
	"Day,Events,Searches,Include IDs,Exclude Source,Include Rejected,Match Case,Whole Words"
	]
	for day, day_data in results_daily.items():
	output.append(
	f"{day},{day_data['events']},{day_data['searches']},{day_data['search_ids']},{day_data['exclude_source']},{day_data['include_rejected']},{day_data['match_case']},{day_data['whole_words']},"
	)

	with open("output.csv", "w") as f:
	f.write("\n".join(output))
	Day,Events,Searches,Include IDs,Exclude Source,Include Rejected,Match Case,Whole Words
	2024-12-09,601678,26975,14151,10228,0,0,0,
	2024-12-10,628486,14053,3898,2213,0,0,0,
	2024-12-11,548421,14312,6584,3192,0,0,0,
	2024-12-12,526714,13853,5926,2622,0,0,0,
	2024-12-13,537543,9286,4993,2288,0,0,0,
	2024-12-14,591097,8854,2707,663,0,0,0,
	2024-12-15,565133,8293,2996,539,0,0,0,
	2024-12-16,597035,21817,14523,5171,0,0,0,
	2024-12-17,1147842,14604,10588,3054,0,0,0,
	2024-12-18,642743,4493,1711,489,0,0,0,
	2024-12-19,515669,4214,2116,653,0,0,0,
	2024-12-20,752621,9716,6671,2661,0,0,0,
	2024-12-21,1463417,42649,4848,1895,0,0,0,
	2024-12-22,1243973,10878,8055,4868,0,0,0,
	2024-12-23,742328,15712,10992,1023,0,0,0,
	2024-12-24,725259,27390,22369,13591,0,0,0,
	2024-12-25,784661,31655,18841,13466,0,0,2,
	2024-12-26,809288,30271,23391,9404,1,1,1,
	2024-12-27,728427,26820,22519,3523,0,0,0,
	2024-12-28,618093,19434,14002,11245,0,0,0,
	2024-12-29,676242,25207,21665,19589,0,0,0,