Created
July 8, 2024 09:14
-
-
Save rvalyi/ba7670c8f341092aa14550f46459b51c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import math | |
import git | |
from pathlib import Path | |
from datetime import datetime | |
from git.objects import base | |
from slugify import slugify | |
from typing import List | |
import subprocess | |
OUTPUT_DIR = "/home/rvalyi/DEV/odoo_module_diff" | |
REPO_PATH = "odoo/src" | |
# ADDON = "purchase" | |
SINCE_VERSION = 7 # Starting version | |
LINE_CHANGE_THRESHOLD = 20 | |
DB_STRUCTURE_STRINGS = ["= fields.", "_inherit = ", "_inherits = "] | |
# Initialize local repo object | |
repo = git.Repo(REPO_PATH) | |
def find_commit_by_message(repo: git.Repo, message: str): | |
""" | |
Find the first commit with a specific message. | |
Return the more recent commit if no match is found. | |
""" | |
last_commit = None | |
for commit in repo.iter_commits(): | |
if last_commit is None: | |
last_commit = commit | |
if message in str(commit.message): | |
return commit, True | |
return last_commit, False | |
def commit_contains_string(path: str, commit: git.Commit, search_strings: List[str]): | |
""" | |
Check if the commit diff contains the specified string. | |
If a search_string is found in a diff_item, then we count | |
it only if it's inside a -/+ line or in the 2 lines before. | |
""" | |
matches = 0 | |
diffs = [] | |
for parent in commit.parents: | |
diff = commit.diff(parent, paths=path, create_patch=True) | |
for diff_item in diff: | |
for search_string in search_strings: | |
diff_string = diff_item.diff.decode("utf-8", errors="ignore") | |
if search_string in diff_string: | |
diffs.append(diff) | |
line_minus1 = "" | |
line_minus2 = "" | |
for line in diff_string.splitlines(): | |
if line.startswith("-") or line.startswith("+"): | |
if search_string in (line + line_minus1 + line_minus2): | |
matches += 1 | |
line_minus1 = "" | |
line_minus2 = "" | |
continue | |
line_minus1 = line | |
line_minus2 = line_minus1 | |
return diffs, matches | |
def scan_module_commits( | |
addon: str, start_commit: git.Commit, end_commit: git.Commit, output_module_dir: str | |
): | |
if addon == "base": | |
module_path = "odoo/addons/base/" | |
else: | |
module_path = f"addons/{addon}/models/" | |
# Get the commits between the two found commits | |
commits = list( | |
repo.iter_commits( | |
f"{start_commit.hexsha}..{end_commit.hexsha}", paths=module_path | |
) | |
) | |
result = [] | |
for commit in commits: | |
summary = commit.message.strip().splitlines()[0] | |
if "forwardport" in summary.lower().replace(" ", "").replace("-", ""): | |
# such ports may present structural changes in the diff | |
# but we assume they aren't introducing new changes | |
# since previous serie. | |
# such false positives were common before version 13. | |
continue | |
total_changes = 0 | |
for file in commit.stats.files: | |
if str(file).startswith(module_path): | |
total_changes += commit.stats.files[file]["lines"] | |
migration_diffs, matches = commit_contains_string( | |
module_path, commit, DB_STRUCTURE_STRINGS | |
) | |
if matches > 2 or matches > 1 and total_changes > LINE_CHANGE_THRESHOLD: | |
pr = "" | |
for line in commit.message.splitlines(): | |
if " odoo/odoo#" in str(line): | |
pr = str(line).split(" odoo/odoo#")[1].strip() | |
result.append( | |
{ | |
"commit_sha": commit.hexsha, | |
"total_changes": int(total_changes), | |
"author": commit.author.name, | |
"date": datetime.fromtimestamp(commit.committed_date).strftime( | |
"%Y-%m-%d %H:%M:%S" | |
), | |
"summary": summary, | |
"message": commit.message.strip(), | |
"pr": f"https://github.com/odoo/odoo/pull/{pr}", | |
"matches": matches, | |
"diffs": migration_diffs, | |
} | |
) | |
# Output the result | |
result.reverse() | |
for idx, item in enumerate(result): | |
# print(f"Commit SHA: {item['commit_sha']}") | |
print(f"\nTotal Changes: {item['total_changes']}") | |
print(f"Structural Changes: {item['matches']}") | |
print(f"Date: {item['date']}") | |
print(f"Summary: {item['summary']}") | |
print(f"PR: {item['pr']}") | |
heat_diff = 0 | |
if item["total_changes"] > 400: | |
heat_diff = 3 | |
elif item["total_changes"] > 200: | |
heat_diff = 2 | |
elif item["total_changes"] > 100: | |
heat_diff = 1 | |
heat_struct = int(math.log2(item["matches"] / 2)) | |
heat = f"{'+'*heat_struct + '_' if heat_struct > 0 else ''}{'#'*heat_diff + '_' if heat_diff > 0 else ''}".rjust( | |
9, "_" | |
) | |
filename = f"{output_module_dir}/pr_{str(idx).zfill(3)}{heat}{item['pr'].split('/')[-1]}_{slugify(item['summary'])[:64]}.patch" | |
print(filename) | |
with open(filename, "w") as f: | |
f.write(f"PR: {item['pr']}") | |
f.write(f"\nCommit SHA: {item['commit_sha']}") | |
f.write(f"\nStructural Changes: {item['matches']}") | |
f.write(f"\nTotal Changes: {item['total_changes']}") | |
f.write(f"\nAuthor: {item['author']}") | |
f.write(f"\nDate: {item['date']}") | |
f.write("\n\n" + item["message"]) | |
f.write("\n\n" + "=" * 33 + " pseudo patch: " + "=" * 33 + "\n\n") | |
for diffs in item["diffs"]: | |
for diff_item in diffs: | |
f.write(diff_item.diff.decode("utf-8", errors="ignore")) | |
def list_addons(repo_path: str, excludes: List[str], min_lines=500, max_deps=40): | |
directory = Path(f"{repo_path}/addons") | |
subdirectories = [] | |
for d in directory.iterdir(): | |
if not d.is_dir(): | |
continue | |
is_excluded = False | |
for exclude in excludes: | |
if d.name.startswith(exclude): | |
is_excluded = True | |
continue | |
if is_excluded: | |
continue | |
if min_lines: | |
total_lines = 0 | |
# Walk through the directory | |
for root, _, files in os.walk(d): | |
for file in files: | |
if file.endswith(".py"): | |
file_path = os.path.join(root, file) | |
with open( | |
file_path, "r", encoding="utf-8", errors="ignore" | |
) as f: | |
# Count lines in the file | |
total_lines += sum(1 for _ in f) | |
if total_lines < min_lines: | |
continue | |
# if total_lines > 1000: | |
# continue # done already | |
subdirectories.append(d) | |
return subdirectories | |
subdirectories = list_addons( | |
REPO_PATH, excludes=["l10n_", "website_", "test"], min_lines=500, max_deps=40 | |
) | |
# for d in subdirectories: | |
# print(d.name) | |
# Find the start commit | |
def release_message(serie: int): | |
if serie >= 15: | |
release_message = f"bump master release to {serie}" | |
elif serie >= 13: | |
release_message = f"bump master release version to {serie}" | |
elif serie == 12: | |
release_message = "master is back as 12" | |
elif serie == 11: | |
release_message = "[REL] 11.0" | |
elif serie == 10: | |
release_message = "[REL] master is version 10" | |
elif serie == 9: | |
release_message = "[REL] Odoo 9" | |
elif serie == 8: | |
release_message = "[REL] 8.0 RC1" | |
elif serie == 7: | |
release_message = "[REL] Release 7.0" | |
else: | |
raise RuntimeError("What is wrong with you??") | |
return release_message | |
start_commit, _start_found = find_commit_by_message( | |
repo, release_message(SINCE_VERSION) | |
) | |
print(f"Start commit {start_commit}") | |
# Find the end commit | |
end_commit, end_found = find_commit_by_message(repo, release_message(SINCE_VERSION + 1)) | |
print(f"End commit {end_commit}") | |
# Ensure both commits are found | |
if not start_commit or not end_commit: | |
print( | |
f"Could not find the required commits for versions {SINCE_VERSION} and {SINCE_VERSION + 1}" | |
) | |
exit(1) | |
if end_found: | |
serie = f"{SINCE_VERSION + 1}.0" | |
else: | |
serie = f"{SINCE_VERSION}.0" | |
# 1st we checkout the end_commit, se can read dependencies and line of codes | |
result = subprocess.run( | |
[ | |
"git", | |
"checkout", | |
end_commit.hexsha, | |
"-f", | |
], | |
cwd=REPO_PATH, | |
capture_output=True, | |
text=True, | |
) | |
assert result.returncode == 0, result.stderr + "\n" + result.stdout | |
for d in subdirectories: | |
print(f"\n***** {d.name} ".ljust(40, "*")) | |
output_module_dir = ( | |
f"{OUTPUT_DIR}/{d.name}" # TODO we might add a version dir for OpenUpgrade | |
) | |
os.makedirs(output_module_dir, exist_ok=True) | |
result = subprocess.run( | |
[ | |
"manifestoo", | |
"--addons-path", | |
"odoo/src/addons", | |
f"--odoo-series={serie}", | |
"--select", | |
d.name, | |
"tree", | |
], | |
capture_output=True, | |
text=True, | |
) | |
manifestoo_output = result.stdout | |
with open(f"{output_module_dir}/dependencies.txt", "w") as f: | |
f.write(manifestoo_output) | |
scan_module_commits(d.name, start_commit, end_commit, output_module_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
the list of modules in the README files was done using:
git checkout less-noise README.md; head -n 6 README.md > temp_file && mv temp_file README.md; du -sh -- */ | sort -rh | awk '{sub(//$/, "", $2); print NR ". " $2 " - " $1}' >> README.md