Created
February 3, 2024 15:30
-
-
Save Urgau/edc4f767025d608776ed5fa8446f138b to your computer and use it in GitHub Desktop.
Analysing the crater run results from #120393 rust-lang/rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from collections import Counter | |
import math | |
import sys | |
import os | |
UPSTREAM_FIXED = { | |
"Deserialize": ("serde_derive", ">1.0.113"), | |
"Serialize": ("serde_derive", ">1.0.113"), | |
"serde::Deserialize": ("serde_derive", ">1.0.113"), | |
"serde::Serialize": ("serde_derive", ">1.0.113"), | |
"serde_derive::Deserialize": ("serde_derive", ">1.0.113"), | |
"serde_derive::Serialize": ("serde_derive", ">1.0.113"), | |
"QueryId": ("diesel_derives", ">2.0.0-rc"), | |
"Queryable": ("diesel_derives", ">2.0.0-rc"), | |
"Insertable": ("diesel_derives", ">2.0.0-rc"), | |
"Identifiable": ("diesel_derives", ">2.0.0-rc"), | |
"AsChangeset": ("diesel_derives", ">2.0.0-rc"), | |
"Associations": ("diesel_derives", ">2.0.0-rc"), | |
"QueryableByName": ("diesel_derives", ">2.0.0-rc"), | |
"DieselNumericOps": ("diesel_derives", ">2.0.0-rc"), | |
"FromRawSql": ("diesel_derives", ">2.0.0-rc"), | |
"Trace": ("gc", "fixed in synstructure"), | |
"Finalize": ("gc", "fixed in synstructure"), | |
"Collect": ("gc-arena-derive", "fixed in synstructure"), | |
"Abobination": ("gc-arena-derive", "fixed in synstructure"), | |
"StructOpt": ("structopt", ""), | |
"Command": ("structopt", ""), | |
"StructOpt": ("structopt", ""), | |
"FromPest": ("from-pest", ""), | |
} | |
UPSTREAM_DEPRECATED = { | |
"Fail": ("failure", "deprecated for >4yrs"), | |
"failure::Fail": ("failure", "deprecated for >4yrs"), | |
} | |
UPSTREAM_NOT_FIXED = { | |
"YaSerialize": ("yaserde", None), | |
"YaDeserialize": ("yaserde", None), | |
"FromPrimitive": ("num_derive", "https://github.com/rust-num/num-derive/blob/50ecdb10ac0934eccd971c20bde9bee5fe99ed98/src/lib.rs#L102"), | |
"num_derive::FromPrimitive": ("num_derive", "https://github.com/rust-num/num-derive/blob/50ecdb10ac0934eccd971c20bde9bee5fe99ed98/src/lib.rs#L102"), | |
"ToPrimitive": ("num_derive", None), | |
"num_derive::ToPrimitive": ("num_derive", None), | |
"PeekPoke": ("peek-poke-derive", "https://github.com/servo/webrender/blob/8ce388eb12df8b3a33c7a792a85e66e52acc3ca8/peek-poke/peek-poke-derive/src/lib.rs#L248C16-L248C27"), | |
"Display": ("displaydoc", "https://github.com/yaahc/displaydoc/blob/f0b62a55ec2495b1a60b18f1d93f8b27e53123a7/src/expand.rs#L25"), | |
"Error": ("displaydoc", "https://github.com/yaahc/displaydoc/blob/f0b62a55ec2495b1a60b18f1d93f8b27e53123a7/src/expand.rs#L25"), | |
"Endpoint": ("rustify_derive", "https://github.com/jmgilman/rustify/blob/68fdbdb848b012f1116b972900dca23cde260e0e/rustify_derive/src/lib.rs#L308C16-L308C27"), | |
"Savefile": ("savefile-derive", "https://docs.rs/savefile-derive/0.16.4/src/savefile_derive/lib.rs.html#1136"), | |
} | |
UPSTREAM_MACRO_FIXED = { | |
"$crate::py_class_impl": ("pyo3", "since 2 yrs ago"), | |
} | |
class NonLocalError(object): | |
def __init__(self, filename, type_ = None, body_type = None, body_name = None, derive_name = None, macro_name = None): | |
self.filename = filename | |
self.type_ = type_ | |
self.body_type = body_type | |
self.body_name = body_name | |
self.derive_name = derive_name | |
self.macro_name = macro_name | |
def process_file(filename): | |
errors = [] | |
with open(filename, "r") as f: | |
error = NonLocalError(filename) | |
for l in f: | |
if l.startswith("[WARN] too many lines"): | |
error.type_ = None | |
break | |
if not l.startswith("[INFO] [stdout]"): | |
continue | |
l = l[len("[INFO] [stdout] "):].strip() | |
if l.startswith("error: non-local"): | |
if error.type_ is not None: | |
errors.append(error) | |
error = NonLocalError(filename) | |
error.type_ = "impl" if "impl" in l else "macro_rules!" | |
elif l.startswith("= help: move this") or l.startswith("= help: reove"): | |
words = l.rsplit(' ') | |
offset = 4 if l.endswith("bodies") else 0 | |
error.body_type = words[-2 - offset] | |
error.body_name = words[-1 - offset][1:-1] | |
if words[-3 - offset] != "current": | |
error.body_type = words[-3 - offset] + " " + error.body_type | |
elif l.startswith("= note: this error originates in the derive macro"): | |
words = l.rsplit(' ') | |
# print(words) | |
error.derive_name = words[9][1:-1] | |
# print(error.derive_name) | |
elif l.startswith("= note: this error originates in the macro"): | |
words = l.rsplit(' ') | |
error.macro_name = words[8][1:-1] | |
if error.type_ is not None: | |
# if error.derive_name is None and error.macro_name is None: | |
# print(error.filename) | |
errors.append(error) | |
return errors | |
def main(): | |
try: | |
errors = [] | |
for root, dirs, files in os.walk(".", topdown=False): | |
for name in files: | |
path = os.path.join(root, name) | |
errors.extend(process_file(path)) | |
print("## Crater report analysis") | |
print() | |
print("*Context: The crater run temporarily put the lint to deny-by-default, and here are the results.*") | |
print() | |
total_errors = len(errors) | |
print("Affected projects: 7057 / 851203 (0.82%)") | |
print("Total errors: {}".format(total_errors)) | |
print() | |
derives = Counter() | |
derives_crates = Counter() | |
for error in errors: | |
if error.derive_name is not None: | |
derives.update([error.derive_name]) | |
derives_crates.update([error.filename]) | |
macros = Counter() | |
macros_crates = Counter() | |
for error in errors: | |
if error.macro_name is not None: | |
macros.update([error.macro_name]) | |
macros_crates.update([error.filename]) | |
rest = Counter() | |
rest_crates = Counter() | |
for error in errors: | |
if error.macro_name is None and error.derive_name is None: | |
rest.update([error.body_type]) | |
rest_crates.update([error.filename]) | |
# if error.body_type is None: | |
# print(error.filename) | |
total_derives = sum(derives[k] for k in derives) | |
total_macros = sum(macros[k] for k in macros) | |
total_manual = total_errors - total_derives - total_macros | |
print("Errors from derive macros: {} (across {} different derive macros)".format(total_derives, len(derives))) | |
print("Errors from `macro_rules!`: {} (across {} different `macro_rules!`)".format(total_macros, len(macros))) | |
print("Errors (manual, not above): {}".format(total_manual)) | |
print() | |
print("### Derive macros") | |
print() | |
print("<details>") | |
print() | |
total_derives_fixed = sum(derives[k] for k in UPSTREAM_FIXED) | |
total_derives_deprecated = sum(derives[k] for k in UPSTREAM_DEPRECATED) | |
total_derives_not_fixed = sum(derives[k] for k in UPSTREAM_NOT_FIXED) | |
total_derives_uncatgorised = total_derives - total_derives_fixed - total_derives_deprecated - total_derives_not_fixed | |
print("Total errors from derive macros **fixed upstream**: {} ({:.1f}%)".format(total_derives_fixed, float(total_derives_fixed) / float(total_derives) * 100.0)) | |
print("Total errors from derive macros **NOT fixed upstream**: {} ({:.1f}%)".format(total_derives_not_fixed, float(total_derives_not_fixed) / float(total_derives) * 100.0)) | |
print("Total errors from derive macros **deprecated**: {} ({:.1f}%)".format(total_derives_deprecated, float(total_derives_deprecated) / float(total_derives) * 100.0)) | |
print("Total errors from derive macros **uncategorised**: {} ({:.1f}%)".format(total_derives_uncatgorised, float(total_derives_uncatgorised) / float(total_derives) * 100.0)) | |
print() | |
for (d, count) in derives.most_common(): | |
print(" - `{}`: {} (status: {})".format(d, count, "fixed in {}".format(UPSTREAM_FIXED[d][0]) if d in UPSTREAM_FIXED else "not-fixed" if d in UPSTREAM_NOT_FIXED else "deprecated" if d in UPSTREAM_DEPRECATED else "n/a")) | |
print() | |
print("</details>") | |
print() | |
print("### `macro_rules!`") | |
print() | |
total_macros_fixed = sum(macros[k] for k in UPSTREAM_MACRO_FIXED) | |
print("<details>") | |
print() | |
for (d, count) in macros.most_common(): | |
print(" - `{}`: {}".format(d, count)) | |
print() | |
print("</details>") | |
print() | |
print("### Manual (not derive or macro_rules!)") | |
print() | |
print("<details>") | |
print() | |
print("Inside:") | |
for (d, count) in rest.most_common(): | |
print(" - `{}`: {}".format(d, count)) | |
print() | |
print("</details>") | |
print() | |
print("------") | |
print() | |
print("### Summary") | |
print() | |
total_errors_excluding_deprecated = total_errors - total_derives_deprecated | |
total_errors_fixed_by_cargo_update = total_derives_fixed + total_macros_fixed | |
# total_rest_crates = sum(rest_crates[k] for k in rest_crates) | |
total_rest_crates = len(rest_crates) | |
total_errors_uncat = total_errors_excluding_deprecated - total_derives_fixed - total_derives_not_fixed - total_manual | |
print(f"In conclusion, this crater run revealed that making the lint deny-by-default (which is NOT default that would would be used, it would be warn-by-default) would break 7057 crates / 851203 crates tested (0.82%), there was a total of {total_errors} errors, of which {total_derives} ({float(total_derives) / float(total_errors) * 100:.1f}%) errors were coming from derive macros, {total_macros} were coming from `macro_rules!` and {total_manual} errors are not coming from either of them.") | |
print() | |
print(f"Analysing the root of those derives and macros revealed many outdated versions of `serde_derive` and `diesel_derives`, representing nearly 74% of the total errors alone, as well 6 different derive crates that represent 11.4% that would need to be updated.") | |
print() | |
print("To put it simply:") | |
print() | |
print(" - {} ({:.1f}%) errors would be fixed by `cargo update` (representing {} different crates)".format(total_errors_fixed_by_cargo_update, float(total_errors_fixed_by_cargo_update) / float(total_errors_excluding_deprecated) * 100, len(derives_crates))) | |
print(" - {} ({:.1f}%) errors that could fixed by upstream change (at least 6 different derive crates)".format(total_derives_not_fixed, float(total_derives_not_fixed) / float(total_errors_excluding_deprecated) * 100)) | |
print(" - {} ({:.1f}%) errors requires manual intervention ({} different crates)".format(total_manual, float(total_manual) / float(total_errors_excluding_deprecated) * 100, total_rest_crates)) | |
print(" - {} ({:.1f}%) uncategorised errors (probably manual intervention and deps change): ".format(total_errors_uncat, float(total_errors_uncat) / float(total_errors_excluding_deprecated) * 100)) | |
print() | |
print("I think those are more than acceptable numbers, in more that 3/4 of cases a `cargo update` would fix the warnings and by fixing 6 different derives we could get to 90%, the rest of cases would need manual interventions, but some intervention are to be expected.") | |
print() | |
print("*Reminder that the lint is warn-by-default, so it wouldn't actually break any crate!*") | |
except Exception as ex: | |
print("unexpected error: {}".format(ex), file=sys.stderr) | |
sys.exit(84) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment