Created
February 10, 2024 13:38
-
-
Save Urgau/eea71b671e95bc7348cc58fdbb7a24bf to your computer and use it in GitHub Desktop.
Analysis the crater run of -Zcheck-cfg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from collections import Counter | |
import math | |
import sys | |
import os | |
class UnexpectedCfg(object): | |
def __init__(self, filename, type_ = None, name = None, value = None): | |
self.filename = filename | |
self.type_ = type_ | |
self.name = name | |
self.value = value | |
def process_file(filename): | |
errors = [] | |
with open(filename, "r") as f: | |
error = UnexpectedCfg(filename) | |
prev_l = None | |
counter = 3 | |
for l in f: | |
if l.startswith("[WARN] too many lines"): | |
error.type_ = None | |
break | |
if not l.startswith("[INFO] [stdout]"): | |
continue | |
l = l[len("[INFO] [stdout] "):].strip() | |
if l.startswith("error: unexpected `cfg` condition"): | |
if error.type_ is not None: | |
errors.append(error) | |
words = l.rsplit(' ') | |
type_ = words[4][0:-1] | |
error = UnexpectedCfg(filename, type_) | |
counter = 4 | |
v = l[l.rindex(':') + 3:-1] | |
if type_ == "name": | |
error.name = v | |
elif type_ == "value": | |
error.value = v | |
else: | |
raise "lol" | |
elif counter == 0 and "^^^" in l and "|" in prev_l: | |
offset = prev_l.index("|") | |
start = l.index("^") + offset | |
end = len(l) + offset | |
if prev_l[end:].startswith(" = ") or prev_l[end:].startswith("="): | |
tmp = prev_l[end + (3 if prev_l[end] == " " else 1):] | |
if "," in tmp: | |
a = tmp.index(",") | |
elif ")" in tmp: | |
a = tmp.index(")") | |
elif "\"" in tmp: | |
a = tmp.index("\"") | |
if error.value is None: | |
error.value = tmp[:a] | |
if error.name is None: | |
error.name = prev_l[start:end] | |
else: | |
maybe = prev_l[start:end] | |
maybe_ss = maybe.split(" = ", 1) | |
if len(maybe_ss) <= 2: | |
if error.name is None: | |
error.name = maybe_ss[0] | |
if error.value is None: | |
error.value = maybe_ss[1] if len(maybe_ss) == 2 else None | |
errors.append(error) | |
if error.name is not None and "=" in error.name: | |
error.name = error.name[:error.name.index("=")] | |
if error.value is not None and not error.value.startswith("\""): | |
error.value = "\"" + error.value + "\"" | |
# if error.name == "feature" and error.value == "": | |
# print(filename) | |
# print(prev_l) | |
# print(l) | |
# print(start, end, len(l)) | |
# print(prev_l[start:end]) | |
# print(prev_l[end:]) | |
# if "cargo-clippy" in error.name: | |
# print(filename) | |
counter -= 1 | |
prev_l = l | |
if error.type_ is not None: | |
# if error.derive_name is None and error.macro_name is None: | |
# print(error.filename) | |
errors.append(error) | |
return errors | |
def main(): | |
try: | |
errors = [] | |
for root, dirs, files in os.walk("regressed/", topdown=False): | |
for name in files: | |
path = os.path.join(root, name) | |
errors.extend(process_file(path)) | |
print("## Crater report analysis") | |
print() | |
print("*Context: The crater run enable-by-default cargo `-Zcheck-cfg` and temporarily put the `unexpected_cfgs` lint to deny-by-default, and here are the results.*") | |
print() | |
filenames = Counter() | |
filenamesdocsrs = Counter() | |
filenamesclippy = Counter() | |
for error in errors: | |
if error.name == "docsrs" or error.filename == "docs-rs": | |
filenamesdocsrs.update([error.filename]) | |
elif error.name is not None and "clippy" in error.name: | |
filenamesclippy.update([error.filename]) | |
else: | |
filenames.update([error.filename]) | |
total_projects = len(filenames) | |
total_errors = sum(filenames[c] for c in filenames) | |
print(f"Affected projects: {total_projects} / 414692 ({total_projects/414692.0*100.0:.2f}%)") | |
print("Total (raw) errors: {}".format(total_errors)) | |
print() | |
cfgs = Counter() | |
cfgs_filenames = {} | |
features = Counter() | |
features_filenames = {} | |
target_s = Counter() | |
target_s_filenames = {} | |
by_projects = {} | |
for error in errors: | |
cfg = (error.name, error.value) | |
cfgs.update([cfg]) | |
if cfg not in cfgs_filenames: | |
cfgs_filenames[cfg] = Counter() | |
cfgs_filenames[cfg].update([error.filename]) | |
if error.name == "feature": | |
features.update([error.value]) | |
if error.value not in features_filenames: | |
features_filenames[error.value] = Counter() | |
features_filenames[error.value].update([error.filename]) | |
if error.name is not None and error.name.startswith("target_"): | |
target_s.update([cfg]) | |
if cfg not in target_s_filenames: | |
target_s_filenames[cfg] = Counter() | |
target_s_filenames[cfg].update([error.filename]) | |
if cfg != ("docsrs", None) and cfg != ("docs_rs", None) and\ | |
cfg != ("rustfmt", None) and cfg != ("feature", "cargo-clippy") and\ | |
cfg != ("feature", "clippy"): | |
if error.filename not in by_projects: | |
by_projects[error.filename] = Counter() | |
by_projects[error.filename].update([cfg]) | |
print("### 50 most seen unexpected cfg errors") | |
print() | |
print("Summary: no false-positives[^false_positives], see the others section for more details.") | |
print() | |
print("<details>") | |
print() | |
print("well, ignoring `docsrs` (already fixed), `rustfmt` (deprecated/no longer in use) as well as `feature=\"cargo-clippy\"` (and `feature=\"clippy\"`)\" which are deprecated and should be in the `[features]` table.") | |
print() | |
print("| name | value | comment |") | |
print("|------|-------|---------|") | |
for _, (cfg, count) in zip(range(50), cfgs.most_common()): | |
print(f"| `{cfg[0]}` | `{cfg[1]}` | {count} errors in {len(cfgs_filenames[cfg])} projects |") | |
print() | |
print("</details>") | |
print() | |
print("### 50 most seen unexpected `feature` cfg") | |
print() | |
print("Summary: no false-positives, all of them are missing from the `[features]` table.") | |
print() | |
print("<details>") | |
print() | |
print("| name | value | comment |") | |
print("|------|-------|---------|") | |
for _, (value, count) in zip(range(50), features.most_common()): | |
print(f"| `feature` | `{value}` | {count} errors in {len(features_filenames[value])} projects |") | |
print() | |
print("</details>") | |
print() | |
print("### 50 most seen unexpected `target_*` cfg") | |
print() | |
print("Summary: Many typos (`target_os=\"unix\"`, `target_arch=\"i686\"`, ...), some reference to removed targets (`target_os=\"cloudabi\"`, `target_arch=\"asmjs\"`, ...), as well as some custom cfg (`target_os=\"solana\"`, ...).") | |
print() | |
print("<details>") | |
print() | |
print("| name | value | comment |") | |
print("|------|-------|---------|") | |
for _, (cfg, count) in zip(range(50), target_s.most_common()): | |
print(f"| `{cfg[0]}` | `{cfg[1]}` | {count} errors in {len(target_s_filenames[cfg])} projects |") | |
print() | |
print("</details>") | |
print() | |
print("------") | |
print() | |
total_features = sum(features[f] if f != "\"cargo-clippy\"" and f != "\"clippy\"" else 0 for f in features) | |
total_target_s = sum(target_s[v] for v in target_s) | |
total_without = sum(sum(by_projects[v][v1] for v1 in by_projects[v]) for v in by_projects) | |
total_actions = sum(len(by_projects[v]) for v in by_projects) | |
total_projects = len(by_projects) | |
# for v in by_projects: | |
# print(v, by_projects[v], len(by_projects[v])) | |
print("### Summary") | |
print() | |
# print(by_projects) | |
print("There are four big outliers: `docsrs` (alread fixed)[^docsrs], `rustfmt` (deprecated/no longer in use) as well as `feature=\"cargo-clippy\"` (and `feature=\"clippy\"`) both of which are de-facto deprecated and no longer used. There is nothing do to for those.") | |
print() | |
print(f"Therefore (excluding the outliers) there are {total_without} unexpected cfgs, of which {total_features} of the are unexpected `feature` cfgs and {total_target_s} errors are of `target_*`.") | |
print() | |
print(f"After manually checking the 3 categories above I wasn't able to find any false positive[^false_positives]; additionaly, {total_actions} actions would need to be taken accross {total_projects} projects ({total_projects/414982*100:.1f}% of all the projects tested), by either: fixing the typo, removing the staled condition, marking as expected the custom cfg, adding the feature to the `features` table...") | |
print() | |
print("All and all, while the impact is certainly not minimal, I think it is heavily mitigated by the fact that it's warn-by-default, only affects maintainers (never the downstream users) and by it's ability to warn users on unexpected cfgs, a defitiancy of the Rust toolchain, reported many times, that will final be fixed.") | |
print() | |
print("[^docsrs]: `docsrs` shouldn't have pop-up in this crater run but I was trick by the off-by-one date of rustup and `rustc -V` :-|") | |
print("[^false_positives]: by \"false positive\" I mean: missing well known names/values") | |
except Exception as ex: | |
print("unexpected error: {}".format(ex), file=sys.stderr) | |
raise ex | |
sys.exit(84) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment