Skip to content

Instantly share code, notes, and snippets.

@Urgau
Created February 3, 2024 15:30
Show Gist options
  • Save Urgau/edc4f767025d608776ed5fa8446f138b to your computer and use it in GitHub Desktop.
Save Urgau/edc4f767025d608776ed5fa8446f138b to your computer and use it in GitHub Desktop.
Analysing the crater run results from #120393 rust-lang/rust
#!/usr/bin/env python3
from collections import Counter
import math
import sys
import os
UPSTREAM_FIXED = {
"Deserialize": ("serde_derive", ">1.0.113"),
"Serialize": ("serde_derive", ">1.0.113"),
"serde::Deserialize": ("serde_derive", ">1.0.113"),
"serde::Serialize": ("serde_derive", ">1.0.113"),
"serde_derive::Deserialize": ("serde_derive", ">1.0.113"),
"serde_derive::Serialize": ("serde_derive", ">1.0.113"),
"QueryId": ("diesel_derives", ">2.0.0-rc"),
"Queryable": ("diesel_derives", ">2.0.0-rc"),
"Insertable": ("diesel_derives", ">2.0.0-rc"),
"Identifiable": ("diesel_derives", ">2.0.0-rc"),
"AsChangeset": ("diesel_derives", ">2.0.0-rc"),
"Associations": ("diesel_derives", ">2.0.0-rc"),
"QueryableByName": ("diesel_derives", ">2.0.0-rc"),
"DieselNumericOps": ("diesel_derives", ">2.0.0-rc"),
"FromRawSql": ("diesel_derives", ">2.0.0-rc"),
"Trace": ("gc", "fixed in synstructure"),
"Finalize": ("gc", "fixed in synstructure"),
"Collect": ("gc-arena-derive", "fixed in synstructure"),
"Abobination": ("gc-arena-derive", "fixed in synstructure"),
"StructOpt": ("structopt", ""),
"Command": ("structopt", ""),
"StructOpt": ("structopt", ""),
"FromPest": ("from-pest", ""),
}
UPSTREAM_DEPRECATED = {
"Fail": ("failure", "deprecated for >4yrs"),
"failure::Fail": ("failure", "deprecated for >4yrs"),
}
UPSTREAM_NOT_FIXED = {
"YaSerialize": ("yaserde", None),
"YaDeserialize": ("yaserde", None),
"FromPrimitive": ("num_derive", "https://github.com/rust-num/num-derive/blob/50ecdb10ac0934eccd971c20bde9bee5fe99ed98/src/lib.rs#L102"),
"num_derive::FromPrimitive": ("num_derive", "https://github.com/rust-num/num-derive/blob/50ecdb10ac0934eccd971c20bde9bee5fe99ed98/src/lib.rs#L102"),
"ToPrimitive": ("num_derive", None),
"num_derive::ToPrimitive": ("num_derive", None),
"PeekPoke": ("peek-poke-derive", "https://github.com/servo/webrender/blob/8ce388eb12df8b3a33c7a792a85e66e52acc3ca8/peek-poke/peek-poke-derive/src/lib.rs#L248C16-L248C27"),
"Display": ("displaydoc", "https://github.com/yaahc/displaydoc/blob/f0b62a55ec2495b1a60b18f1d93f8b27e53123a7/src/expand.rs#L25"),
"Error": ("displaydoc", "https://github.com/yaahc/displaydoc/blob/f0b62a55ec2495b1a60b18f1d93f8b27e53123a7/src/expand.rs#L25"),
"Endpoint": ("rustify_derive", "https://github.com/jmgilman/rustify/blob/68fdbdb848b012f1116b972900dca23cde260e0e/rustify_derive/src/lib.rs#L308C16-L308C27"),
"Savefile": ("savefile-derive", "https://docs.rs/savefile-derive/0.16.4/src/savefile_derive/lib.rs.html#1136"),
}
UPSTREAM_MACRO_FIXED = {
"$crate::py_class_impl": ("pyo3", "since 2 yrs ago"),
}
class NonLocalError(object):
def __init__(self, filename, type_ = None, body_type = None, body_name = None, derive_name = None, macro_name = None):
self.filename = filename
self.type_ = type_
self.body_type = body_type
self.body_name = body_name
self.derive_name = derive_name
self.macro_name = macro_name
def process_file(filename):
errors = []
with open(filename, "r") as f:
error = NonLocalError(filename)
for l in f:
if l.startswith("[WARN] too many lines"):
error.type_ = None
break
if not l.startswith("[INFO] [stdout]"):
continue
l = l[len("[INFO] [stdout] "):].strip()
if l.startswith("error: non-local"):
if error.type_ is not None:
errors.append(error)
error = NonLocalError(filename)
error.type_ = "impl" if "impl" in l else "macro_rules!"
elif l.startswith("= help: move this") or l.startswith("= help: reove"):
words = l.rsplit(' ')
offset = 4 if l.endswith("bodies") else 0
error.body_type = words[-2 - offset]
error.body_name = words[-1 - offset][1:-1]
if words[-3 - offset] != "current":
error.body_type = words[-3 - offset] + " " + error.body_type
elif l.startswith("= note: this error originates in the derive macro"):
words = l.rsplit(' ')
# print(words)
error.derive_name = words[9][1:-1]
# print(error.derive_name)
elif l.startswith("= note: this error originates in the macro"):
words = l.rsplit(' ')
error.macro_name = words[8][1:-1]
if error.type_ is not None:
# if error.derive_name is None and error.macro_name is None:
# print(error.filename)
errors.append(error)
return errors
def main():
try:
errors = []
for root, dirs, files in os.walk(".", topdown=False):
for name in files:
path = os.path.join(root, name)
errors.extend(process_file(path))
print("## Crater report analysis")
print()
print("*Context: The crater run temporarily put the lint to deny-by-default, and here are the results.*")
print()
total_errors = len(errors)
print("Affected projects: 7057 / 851203 (0.82%)")
print("Total errors: {}".format(total_errors))
print()
derives = Counter()
derives_crates = Counter()
for error in errors:
if error.derive_name is not None:
derives.update([error.derive_name])
derives_crates.update([error.filename])
macros = Counter()
macros_crates = Counter()
for error in errors:
if error.macro_name is not None:
macros.update([error.macro_name])
macros_crates.update([error.filename])
rest = Counter()
rest_crates = Counter()
for error in errors:
if error.macro_name is None and error.derive_name is None:
rest.update([error.body_type])
rest_crates.update([error.filename])
# if error.body_type is None:
# print(error.filename)
total_derives = sum(derives[k] for k in derives)
total_macros = sum(macros[k] for k in macros)
total_manual = total_errors - total_derives - total_macros
print("Errors from derive macros: {} (across {} different derive macros)".format(total_derives, len(derives)))
print("Errors from `macro_rules!`: {} (across {} different `macro_rules!`)".format(total_macros, len(macros)))
print("Errors (manual, not above): {}".format(total_manual))
print()
print("### Derive macros")
print()
print("<details>")
print()
total_derives_fixed = sum(derives[k] for k in UPSTREAM_FIXED)
total_derives_deprecated = sum(derives[k] for k in UPSTREAM_DEPRECATED)
total_derives_not_fixed = sum(derives[k] for k in UPSTREAM_NOT_FIXED)
total_derives_uncatgorised = total_derives - total_derives_fixed - total_derives_deprecated - total_derives_not_fixed
print("Total errors from derive macros **fixed upstream**: {} ({:.1f}%)".format(total_derives_fixed, float(total_derives_fixed) / float(total_derives) * 100.0))
print("Total errors from derive macros **NOT fixed upstream**: {} ({:.1f}%)".format(total_derives_not_fixed, float(total_derives_not_fixed) / float(total_derives) * 100.0))
print("Total errors from derive macros **deprecated**: {} ({:.1f}%)".format(total_derives_deprecated, float(total_derives_deprecated) / float(total_derives) * 100.0))
print("Total errors from derive macros **uncategorised**: {} ({:.1f}%)".format(total_derives_uncatgorised, float(total_derives_uncatgorised) / float(total_derives) * 100.0))
print()
for (d, count) in derives.most_common():
print(" - `{}`: {} (status: {})".format(d, count, "fixed in {}".format(UPSTREAM_FIXED[d][0]) if d in UPSTREAM_FIXED else "not-fixed" if d in UPSTREAM_NOT_FIXED else "deprecated" if d in UPSTREAM_DEPRECATED else "n/a"))
print()
print("</details>")
print()
print("### `macro_rules!`")
print()
total_macros_fixed = sum(macros[k] for k in UPSTREAM_MACRO_FIXED)
print("<details>")
print()
for (d, count) in macros.most_common():
print(" - `{}`: {}".format(d, count))
print()
print("</details>")
print()
print("### Manual (not derive or macro_rules!)")
print()
print("<details>")
print()
print("Inside:")
for (d, count) in rest.most_common():
print(" - `{}`: {}".format(d, count))
print()
print("</details>")
print()
print("------")
print()
print("### Summary")
print()
total_errors_excluding_deprecated = total_errors - total_derives_deprecated
total_errors_fixed_by_cargo_update = total_derives_fixed + total_macros_fixed
# total_rest_crates = sum(rest_crates[k] for k in rest_crates)
total_rest_crates = len(rest_crates)
total_errors_uncat = total_errors_excluding_deprecated - total_derives_fixed - total_derives_not_fixed - total_manual
print(f"In conclusion, this crater run revealed that making the lint deny-by-default (which is NOT default that would would be used, it would be warn-by-default) would break 7057 crates / 851203 crates tested (0.82%), there was a total of {total_errors} errors, of which {total_derives} ({float(total_derives) / float(total_errors) * 100:.1f}%) errors were coming from derive macros, {total_macros} were coming from `macro_rules!` and {total_manual} errors are not coming from either of them.")
print()
print(f"Analysing the root of those derives and macros revealed many outdated versions of `serde_derive` and `diesel_derives`, representing nearly 74% of the total errors alone, as well 6 different derive crates that represent 11.4% that would need to be updated.")
print()
print("To put it simply:")
print()
print(" - {} ({:.1f}%) errors would be fixed by `cargo update` (representing {} different crates)".format(total_errors_fixed_by_cargo_update, float(total_errors_fixed_by_cargo_update) / float(total_errors_excluding_deprecated) * 100, len(derives_crates)))
print(" - {} ({:.1f}%) errors that could fixed by upstream change (at least 6 different derive crates)".format(total_derives_not_fixed, float(total_derives_not_fixed) / float(total_errors_excluding_deprecated) * 100))
print(" - {} ({:.1f}%) errors requires manual intervention ({} different crates)".format(total_manual, float(total_manual) / float(total_errors_excluding_deprecated) * 100, total_rest_crates))
print(" - {} ({:.1f}%) uncategorised errors (probably manual intervention and deps change): ".format(total_errors_uncat, float(total_errors_uncat) / float(total_errors_excluding_deprecated) * 100))
print()
print("I think those are more than acceptable numbers, in more that 3/4 of cases a `cargo update` would fix the warnings and by fixing 6 different derives we could get to 90%, the rest of cases would need manual interventions, but some intervention are to be expected.")
print()
print("*Reminder that the lint is warn-by-default, so it wouldn't actually break any crate!*")
except Exception as ex:
print("unexpected error: {}".format(ex), file=sys.stderr)
sys.exit(84)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment