Skip to content

Instantly share code, notes, and snippets.

@flodolo
Created January 2, 2025 06:43
Show Gist options
  • Save flodolo/a4b89c2fe6c456de2162be16e24db4be to your computer and use it in GitHub Desktop.
Save flodolo/a4b89c2fe6c456de2162be16e24db4be to your computer and use it in GitHub Desktop.
Pontoon: analyze search options in Heroku logs
#! /usr/bin/env python
import glob
import json
# Find all .json files in logs/
json_files = glob.glob("logs/*.json")
json_files.sort()
results = {
"events": 0,
"searches": 0,
"search_ids": 0,
"exclude_source": 0,
"include_rejected": 0,
"match_case": 0,
"whole_words": 0,
}
results_daily = {}
for json_file in json_files:
print(f"Processing file: {json_file}")
day = json_file[5:15]
with open(json_file, "r") as f:
data = f.readlines()
if day not in results_daily:
results_daily[day] = {
"events": 0,
"searches": 0,
"search_ids": 0,
"exclude_source": 0,
"include_rejected": 0,
"match_case": 0,
"whole_words": 0,
}
for line in data:
results["events"] += 1
results_daily[day]["events"] += 1
json_data = json.loads(line)
path = json_data.get("heroku", {}).get("path", "")
if "search=" in path:
results["searches"] += 1
results_daily[day]["searches"] += 1
if "search_identifiers=true" in path:
results["search_ids"] += 1
results_daily[day]["search_ids"] += 1
if "search_exclude_source_strings=true" in path:
results["exclude_source"] += 1
results_daily[day]["exclude_source"] += 1
if "search_rejected_translations=true" in path:
results["include_rejected"] += 1
results_daily[day]["include_rejected"] += 1
if "search_match_case=true" in path:
results["match_case"] += 1
results_daily[day]["match_case"] += 1
if "search_match_whole_word=true" in path:
results["whole_words"] += 1
results_daily[day]["whole_words"] += 1
print(
f"""
Logs between {json_files[0][5:15]} and {json_files[-1][5:15]}
Total events: {results["events"]:,}
Total searches: {results["searches"]:,}
Include search IDs: {results["search_ids"]:,} ({round((results["search_ids"]/results["searches"])*100, 1)}%)
Exclude source string: {results["exclude_source"]:,} ({round((results["exclude_source"]/results["searches"])*100, 2)}%)
Include_rejected translations: {results["include_rejected"]:,} ({round((results["include_rejected"]/results["searches"])*100, 2)}%)
Match case: {results["match_case"]} ({round((results["match_case"]/results["searches"])*100, 2)}%)
Whole words: {results["whole_words"]} ({round((results["whole_words"]/results["searches"])*100, 2)}%)
"""
)
# Generate CSV output
output = [
"Day,Events,Searches,Include IDs,Exclude Source,Include Rejected,Match Case,Whole Words"
]
for day, day_data in results_daily.items():
output.append(
f"{day},{day_data['events']},{day_data['searches']},{day_data['search_ids']},{day_data['exclude_source']},{day_data['include_rejected']},{day_data['match_case']},{day_data['whole_words']},"
)
with open("output.csv", "w") as f:
f.write("\n".join(output))
We can make this file beautiful and searchable if this error is corrected: It looks like row 2 should actually have 8 columns, instead of 9 in line 1.
Day,Events,Searches,Include IDs,Exclude Source,Include Rejected,Match Case,Whole Words
2024-12-09,601678,26975,14151,10228,0,0,0,
2024-12-10,628486,14053,3898,2213,0,0,0,
2024-12-11,548421,14312,6584,3192,0,0,0,
2024-12-12,526714,13853,5926,2622,0,0,0,
2024-12-13,537543,9286,4993,2288,0,0,0,
2024-12-14,591097,8854,2707,663,0,0,0,
2024-12-15,565133,8293,2996,539,0,0,0,
2024-12-16,597035,21817,14523,5171,0,0,0,
2024-12-17,1147842,14604,10588,3054,0,0,0,
2024-12-18,642743,4493,1711,489,0,0,0,
2024-12-19,515669,4214,2116,653,0,0,0,
2024-12-20,752621,9716,6671,2661,0,0,0,
2024-12-21,1463417,42649,4848,1895,0,0,0,
2024-12-22,1243973,10878,8055,4868,0,0,0,
2024-12-23,742328,15712,10992,1023,0,0,0,
2024-12-24,725259,27390,22369,13591,0,0,0,
2024-12-25,784661,31655,18841,13466,0,0,2,
2024-12-26,809288,30271,23391,9404,1,1,1,
2024-12-27,728427,26820,22519,3523,0,0,0,
2024-12-28,618093,19434,14002,11245,0,0,0,
2024-12-29,676242,25207,21665,19589,0,0,0,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment