Urgau · February 3, 2024 15:30
diff --git a/report.py b/report.py
 #!/usr/bin/env python3

 from collections import Counter
 import math
 import sys
 import os

 UPSTREAM_FIXED = {
   "Deserialize": ("serde_derive", ">1.0.113"),
   "Serialize": ("serde_derive", ">1.0.113"),
   "serde::Deserialize": ("serde_derive", ">1.0.113"),
   "serde::Serialize": ("serde_derive", ">1.0.113"),
   "serde_derive::Deserialize": ("serde_derive", ">1.0.113"),
   "serde_derive::Serialize": ("serde_derive", ">1.0.113"),
   "QueryId": ("diesel_derives", ">2.0.0-rc"),
   "Queryable": ("diesel_derives", ">2.0.0-rc"),
   "Insertable": ("diesel_derives", ">2.0.0-rc"),
   "Identifiable": ("diesel_derives", ">2.0.0-rc"),
   "AsChangeset": ("diesel_derives", ">2.0.0-rc"),
   "Associations": ("diesel_derives", ">2.0.0-rc"),
   "QueryableByName": ("diesel_derives", ">2.0.0-rc"),
   "DieselNumericOps": ("diesel_derives", ">2.0.0-rc"),
   "FromRawSql": ("diesel_derives", ">2.0.0-rc"),
   "Trace": ("gc", "fixed in synstructure"),
   "Finalize": ("gc", "fixed in synstructure"),
   "Collect": ("gc-arena-derive", "fixed in synstructure"),
   "Abobination": ("gc-arena-derive", "fixed in synstructure"),
   "StructOpt": ("structopt", ""),
   "Command": ("structopt", ""),
   "StructOpt": ("structopt", ""),
   "FromPest": ("from-pest", ""),
 }

 UPSTREAM_DEPRECATED = {
    "Fail": ("failure", "deprecated for >4yrs"),
    "failure::Fail": ("failure", "deprecated for >4yrs"),
 }

 UPSTREAM_NOT_FIXED = {
    "YaSerialize": ("yaserde", None),
    "YaDeserialize": ("yaserde", None),
    "FromPrimitive": ("num_derive", "https://github.com/rust-num/num-derive/blob/50ecdb10ac0934eccd971c20bde9bee5fe99ed98/src/lib.rs#L102"),
    "num_derive::FromPrimitive": ("num_derive", "https://github.com/rust-num/num-derive/blob/50ecdb10ac0934eccd971c20bde9bee5fe99ed98/src/lib.rs#L102"),
    "ToPrimitive": ("num_derive", None),
    "num_derive::ToPrimitive": ("num_derive", None),
    "PeekPoke": ("peek-poke-derive", "https://github.com/servo/webrender/blob/8ce388eb12df8b3a33c7a792a85e66e52acc3ca8/peek-poke/peek-poke-derive/src/lib.rs#L248C16-L248C27"),
    "Display": ("displaydoc", "https://github.com/yaahc/displaydoc/blob/f0b62a55ec2495b1a60b18f1d93f8b27e53123a7/src/expand.rs#L25"),
    "Error": ("displaydoc", "https://github.com/yaahc/displaydoc/blob/f0b62a55ec2495b1a60b18f1d93f8b27e53123a7/src/expand.rs#L25"),
    "Endpoint": ("rustify_derive", "https://github.com/jmgilman/rustify/blob/68fdbdb848b012f1116b972900dca23cde260e0e/rustify_derive/src/lib.rs#L308C16-L308C27"),
    "Savefile": ("savefile-derive", "https://docs.rs/savefile-derive/0.16.4/src/savefile_derive/lib.rs.html#1136"),
 }

 UPSTREAM_MACRO_FIXED = {
   "$crate::py_class_impl": ("pyo3", "since 2 yrs ago"),
 }

 class NonLocalError(object):
    def __init__(self, filename, type_ = None, body_type = None, body_name = None, derive_name = None, macro_name = None):
        self.filename = filename
        self.type_ = type_
        self.body_type = body_type
        self.body_name = body_name
        self.derive_name = derive_name
        self.macro_name = macro_name

 def process_file(filename):
    errors = []

    with open(filename, "r") as f:
        error = NonLocalError(filename)
        for l in f:
            if l.startswith("[WARN] too many lines"):
                error.type_ = None
                break
            if not l.startswith("[INFO] [stdout]"):
                continue
            l = l[len("[INFO] [stdout] "):].strip()
            if l.startswith("error: non-local"):
                if error.type_ is not None:
                    errors.append(error)
                error = NonLocalError(filename)
                error.type_ = "impl" if "impl" in l else "macro_rules!"
            elif l.startswith("= help: move this") or l.startswith("= help: reove"):
                words = l.rsplit(' ')
                offset = 4 if l.endswith("bodies") else 0
                error.body_type = words[-2 - offset]
                error.body_name = words[-1 - offset][1:-1]
                if words[-3 - offset] != "current":
                    error.body_type = words[-3 - offset] + " " + error.body_type
            elif l.startswith("= note: this error originates in the derive macro"):
                words = l.rsplit(' ')
                # print(words)
                error.derive_name = words[9][1:-1]
                # print(error.derive_name)
            elif l.startswith("= note: this error originates in the macro"):
                words = l.rsplit(' ')
                error.macro_name = words[8][1:-1]

        if error.type_ is not None:
            # if error.derive_name is None and error.macro_name is None:
            #     print(error.filename)
            errors.append(error)

    return errors

 def main():
    try:
        errors = []

        for root, dirs, files in os.walk(".", topdown=False):
            for name in files:
                path = os.path.join(root, name)
                errors.extend(process_file(path))

        print("## Crater report analysis")
        print()

        print("*Context: The crater run temporarily put the lint to deny-by-default, and here are the results.*")
        print()

        total_errors = len(errors)
        print("Affected projects: 7057 / 851203 (0.82%)") 
        print("Total errors: {}".format(total_errors))
        print()

        derives = Counter()
        derives_crates = Counter()
        for error in errors:
            if error.derive_name is not None:
                derives.update([error.derive_name])
                derives_crates.update([error.filename])
        
        macros = Counter()
        macros_crates = Counter()
        for error in errors:
            if error.macro_name is not None:
                macros.update([error.macro_name])
                macros_crates.update([error.filename])

        rest = Counter()
        rest_crates = Counter()
        for error in errors:
            if error.macro_name is None and error.derive_name is None:
                rest.update([error.body_type])
                rest_crates.update([error.filename])
                # if error.body_type is None:
                #     print(error.filename)

        total_derives = sum(derives[k] for k in derives)
        total_macros = sum(macros[k] for k in macros)
        total_manual = total_errors - total_derives - total_macros
        print("Errors from derive macros: {} (across {} different derive macros)".format(total_derives, len(derives)))
        print("Errors from `macro_rules!`: {} (across {} different `macro_rules!`)".format(total_macros, len(macros)))
        print("Errors (manual, not above): {}".format(total_manual))
        print()

        print("### Derive macros")
        print()
        print("<details>")
        print()

        total_derives_fixed = sum(derives[k] for k in UPSTREAM_FIXED)
        total_derives_deprecated = sum(derives[k] for k in UPSTREAM_DEPRECATED)
        total_derives_not_fixed = sum(derives[k] for k in UPSTREAM_NOT_FIXED)
        total_derives_uncatgorised = total_derives - total_derives_fixed - total_derives_deprecated - total_derives_not_fixed
        print("Total errors from derive macros **fixed upstream**: {} ({:.1f}%)".format(total_derives_fixed, float(total_derives_fixed) / float(total_derives) * 100.0))
        print("Total errors from derive macros **NOT fixed upstream**: {} ({:.1f}%)".format(total_derives_not_fixed, float(total_derives_not_fixed) / float(total_derives) * 100.0))
        print("Total errors from derive macros **deprecated**: {} ({:.1f}%)".format(total_derives_deprecated, float(total_derives_deprecated) / float(total_derives) * 100.0))
        print("Total errors from derive macros **uncategorised**: {} ({:.1f}%)".format(total_derives_uncatgorised, float(total_derives_uncatgorised) / float(total_derives) * 100.0))
        print()

        for (d, count) in derives.most_common():
            print(" - `{}`: {} (status: {})".format(d, count, "fixed in {}".format(UPSTREAM_FIXED[d][0]) if d in UPSTREAM_FIXED else "not-fixed" if d in UPSTREAM_NOT_FIXED else "deprecated" if d in UPSTREAM_DEPRECATED else "n/a"))
        
        print()
        print("</details>")
        print()

        print("### `macro_rules!`")
        print()
        
        total_macros_fixed = sum(macros[k] for k in UPSTREAM_MACRO_FIXED)
        
        print("<details>")
        print()
        for (d, count) in macros.most_common():
            print(" - `{}`: {}".format(d, count))
        print()
        print("</details>")
        print()

        print("### Manual (not derive or macro_rules!)")
        print()
        
        print("<details>")
        print()
       
        print("Inside:")
        for (d, count) in rest.most_common():
            print(" - `{}`: {}".format(d, count))
        
        print()
        print("</details>")
        print()
        
        print("------")
        print()

        print("### Summary")
        print()

        total_errors_excluding_deprecated = total_errors - total_derives_deprecated
        total_errors_fixed_by_cargo_update = total_derives_fixed + total_macros_fixed
        # total_rest_crates = sum(rest_crates[k] for k in rest_crates)
        total_rest_crates = len(rest_crates)
        total_errors_uncat = total_errors_excluding_deprecated - total_derives_fixed - total_derives_not_fixed - total_manual

        print(f"In conclusion, this crater run revealed that making the lint deny-by-default (which is NOT default that would would be used, it would be warn-by-default) would break 7057 crates / 851203 crates tested (0.82%), there was a total of {total_errors} errors, of which {total_derives} ({float(total_derives) / float(total_errors) * 100:.1f}%) errors were coming from derive macros, {total_macros} were coming from `macro_rules!` and {total_manual} errors are not coming from either of them.")
        print()
        print(f"Analysing the root of those derives and macros revealed many outdated versions of `serde_derive` and `diesel_derives`, representing nearly 74% of the total errors alone, as well 6 different derive crates that represent 11.4% that would need to be updated.")
        print()
        print("To put it simply:")
        print()
        print(" - {} ({:.1f}%) errors would be fixed by `cargo update` (representing {} different crates)".format(total_errors_fixed_by_cargo_update, float(total_errors_fixed_by_cargo_update) / float(total_errors_excluding_deprecated) * 100, len(derives_crates)))
        print(" - {} ({:.1f}%) errors that could fixed by upstream change (at least 6 different derive crates)".format(total_derives_not_fixed, float(total_derives_not_fixed) / float(total_errors_excluding_deprecated) * 100))
        print(" - {} ({:.1f}%) errors requires manual intervention ({} different crates)".format(total_manual, float(total_manual) / float(total_errors_excluding_deprecated) * 100, total_rest_crates))
        print(" - {} ({:.1f}%) uncategorised errors (probably manual intervention and deps change): ".format(total_errors_uncat, float(total_errors_uncat) / float(total_errors_excluding_deprecated) * 100))
        print()
        print("I think those are more than acceptable numbers, in more that 3/4 of cases a `cargo update` would fix the warnings and by fixing 6 different derives we could get to 90%, the rest of cases would need manual interventions, but some intervention are to be expected.")
        print()
        print("*Reminder that the lint is warn-by-default, so it wouldn't actually break any crate!*")

    except Exception as ex:
        print("unexpected error: {}".format(ex), file=sys.stderr)
        sys.exit(84)

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3

	from collections import Counter
	import math
	import sys
	import os

	UPSTREAM_FIXED = {
	"Deserialize": ("serde_derive", ">1.0.113"),
	"Serialize": ("serde_derive", ">1.0.113"),
	"serde::Deserialize": ("serde_derive", ">1.0.113"),
	"serde::Serialize": ("serde_derive", ">1.0.113"),
	"serde_derive::Deserialize": ("serde_derive", ">1.0.113"),
	"serde_derive::Serialize": ("serde_derive", ">1.0.113"),
	"QueryId": ("diesel_derives", ">2.0.0-rc"),
	"Queryable": ("diesel_derives", ">2.0.0-rc"),
	"Insertable": ("diesel_derives", ">2.0.0-rc"),
	"Identifiable": ("diesel_derives", ">2.0.0-rc"),
	"AsChangeset": ("diesel_derives", ">2.0.0-rc"),
	"Associations": ("diesel_derives", ">2.0.0-rc"),
	"QueryableByName": ("diesel_derives", ">2.0.0-rc"),
	"DieselNumericOps": ("diesel_derives", ">2.0.0-rc"),
	"FromRawSql": ("diesel_derives", ">2.0.0-rc"),
	"Trace": ("gc", "fixed in synstructure"),
	"Finalize": ("gc", "fixed in synstructure"),
	"Collect": ("gc-arena-derive", "fixed in synstructure"),
	"Abobination": ("gc-arena-derive", "fixed in synstructure"),
	"StructOpt": ("structopt", ""),
	"Command": ("structopt", ""),
	"StructOpt": ("structopt", ""),
	"FromPest": ("from-pest", ""),
	}

	UPSTREAM_DEPRECATED = {
	"Fail": ("failure", "deprecated for >4yrs"),
	"failure::Fail": ("failure", "deprecated for >4yrs"),
	}

	UPSTREAM_NOT_FIXED = {
	"YaSerialize": ("yaserde", None),
	"YaDeserialize": ("yaserde", None),
	"FromPrimitive": ("num_derive", "https://github.com/rust-num/num-derive/blob/50ecdb10ac0934eccd971c20bde9bee5fe99ed98/src/lib.rs#L102"),
	"num_derive::FromPrimitive": ("num_derive", "https://github.com/rust-num/num-derive/blob/50ecdb10ac0934eccd971c20bde9bee5fe99ed98/src/lib.rs#L102"),
	"ToPrimitive": ("num_derive", None),
	"num_derive::ToPrimitive": ("num_derive", None),
	"PeekPoke": ("peek-poke-derive", "https://github.com/servo/webrender/blob/8ce388eb12df8b3a33c7a792a85e66e52acc3ca8/peek-poke/peek-poke-derive/src/lib.rs#L248C16-L248C27"),
	"Display": ("displaydoc", "https://github.com/yaahc/displaydoc/blob/f0b62a55ec2495b1a60b18f1d93f8b27e53123a7/src/expand.rs#L25"),
	"Error": ("displaydoc", "https://github.com/yaahc/displaydoc/blob/f0b62a55ec2495b1a60b18f1d93f8b27e53123a7/src/expand.rs#L25"),
	"Endpoint": ("rustify_derive", "https://github.com/jmgilman/rustify/blob/68fdbdb848b012f1116b972900dca23cde260e0e/rustify_derive/src/lib.rs#L308C16-L308C27"),
	"Savefile": ("savefile-derive", "https://docs.rs/savefile-derive/0.16.4/src/savefile_derive/lib.rs.html#1136"),
	}

	UPSTREAM_MACRO_FIXED = {
	"$crate::py_class_impl": ("pyo3", "since 2 yrs ago"),
	}

	class NonLocalError(object):
	def __init__(self, filename, type_ = None, body_type = None, body_name = None, derive_name = None, macro_name = None):
	self.filename = filename
	self.type_ = type_
	self.body_type = body_type
	self.body_name = body_name
	self.derive_name = derive_name
	self.macro_name = macro_name

	def process_file(filename):
	errors = []

	with open(filename, "r") as f:
	error = NonLocalError(filename)
	for l in f:
	if l.startswith("[WARN] too many lines"):
	error.type_ = None
	break
	if not l.startswith("[INFO] [stdout]"):
	continue
	l = l[len("[INFO] [stdout] "):].strip()
	if l.startswith("error: non-local"):
	if error.type_ is not None:
	errors.append(error)
	error = NonLocalError(filename)
	error.type_ = "impl" if "impl" in l else "macro_rules!"
	elif l.startswith("= help: move this") or l.startswith("= help: reove"):
	words = l.rsplit(' ')
	offset = 4 if l.endswith("bodies") else 0
	error.body_type = words[-2 - offset]
	error.body_name = words[-1 - offset][1:-1]
	if words[-3 - offset] != "current":
	error.body_type = words[-3 - offset] + " " + error.body_type
	elif l.startswith("= note: this error originates in the derive macro"):
	words = l.rsplit(' ')
	# print(words)
	error.derive_name = words[9][1:-1]
	# print(error.derive_name)
	elif l.startswith("= note: this error originates in the macro"):
	words = l.rsplit(' ')
	error.macro_name = words[8][1:-1]

	if error.type_ is not None:
	# if error.derive_name is None and error.macro_name is None:
	# print(error.filename)
	errors.append(error)

	return errors

	def main():
	try:
	errors = []

	for root, dirs, files in os.walk(".", topdown=False):
	for name in files:
	path = os.path.join(root, name)
	errors.extend(process_file(path))

	print("## Crater report analysis")
	print()

	print("Context: The crater run temporarily put the lint to deny-by-default, and here are the results.")
	print()

	total_errors = len(errors)
	print("Affected projects: 7057 / 851203 (0.82%)")
	print("Total errors: {}".format(total_errors))
	print()

	derives = Counter()
	derives_crates = Counter()
	for error in errors:
	if error.derive_name is not None:
	derives.update([error.derive_name])
	derives_crates.update([error.filename])

	macros = Counter()
	macros_crates = Counter()
	for error in errors:
	if error.macro_name is not None:
	macros.update([error.macro_name])
	macros_crates.update([error.filename])

	rest = Counter()
	rest_crates = Counter()
	for error in errors:
	if error.macro_name is None and error.derive_name is None:
	rest.update([error.body_type])
	rest_crates.update([error.filename])
	# if error.body_type is None:
	# print(error.filename)

	total_derives = sum(derives[k] for k in derives)
	total_macros = sum(macros[k] for k in macros)
	total_manual = total_errors - total_derives - total_macros
	print("Errors from derive macros: {} (across {} different derive macros)".format(total_derives, len(derives)))
	print("Errors from `macro_rules!`: {} (across {} different `macro_rules!`)".format(total_macros, len(macros)))
	print("Errors (manual, not above): {}".format(total_manual))
	print()

	print("### Derive macros")
	print()
	print("<details>")
	print()

	total_derives_fixed = sum(derives[k] for k in UPSTREAM_FIXED)
	total_derives_deprecated = sum(derives[k] for k in UPSTREAM_DEPRECATED)
	total_derives_not_fixed = sum(derives[k] for k in UPSTREAM_NOT_FIXED)
	total_derives_uncatgorised = total_derives - total_derives_fixed - total_derives_deprecated - total_derives_not_fixed
	print("Total errors from derive macros fixed upstream: {} ({:.1f}%)".format(total_derives_fixed, float(total_derives_fixed) / float(total_derives) * 100.0))
	print("Total errors from derive macros NOT fixed upstream: {} ({:.1f}%)".format(total_derives_not_fixed, float(total_derives_not_fixed) / float(total_derives) * 100.0))
	print("Total errors from derive macros deprecated: {} ({:.1f}%)".format(total_derives_deprecated, float(total_derives_deprecated) / float(total_derives) * 100.0))
	print("Total errors from derive macros uncategorised: {} ({:.1f}%)".format(total_derives_uncatgorised, float(total_derives_uncatgorised) / float(total_derives) * 100.0))
	print()

	for (d, count) in derives.most_common():
	print(" - `{}`: {} (status: {})".format(d, count, "fixed in {}".format(UPSTREAM_FIXED[d][0]) if d in UPSTREAM_FIXED else "not-fixed" if d in UPSTREAM_NOT_FIXED else "deprecated" if d in UPSTREAM_DEPRECATED else "n/a"))

	print()
	print("</details>")
	print()

	print("### `macro_rules!`")
	print()

	total_macros_fixed = sum(macros[k] for k in UPSTREAM_MACRO_FIXED)

	print("<details>")
	print()
	for (d, count) in macros.most_common():
	print(" - `{}`: {}".format(d, count))
	print()
	print("</details>")
	print()

	print("### Manual (not derive or macro_rules!)")
	print()

	print("<details>")
	print()

	print("Inside:")
	for (d, count) in rest.most_common():
	print(" - `{}`: {}".format(d, count))

	print()
	print("</details>")
	print()

	print("------")
	print()

	print("### Summary")
	print()

	total_errors_excluding_deprecated = total_errors - total_derives_deprecated
	total_errors_fixed_by_cargo_update = total_derives_fixed + total_macros_fixed
	# total_rest_crates = sum(rest_crates[k] for k in rest_crates)
	total_rest_crates = len(rest_crates)
	total_errors_uncat = total_errors_excluding_deprecated - total_derives_fixed - total_derives_not_fixed - total_manual

	print(f"In conclusion, this crater run revealed that making the lint deny-by-default (which is NOT default that would would be used, it would be warn-by-default) would break 7057 crates / 851203 crates tested (0.82%), there was a total of {total_errors} errors, of which {total_derives} ({float(total_derives) / float(total_errors) * 100:.1f}%) errors were coming from derive macros, {total_macros} were coming from `macro_rules!` and {total_manual} errors are not coming from either of them.")
	print()
	print(f"Analysing the root of those derives and macros revealed many outdated versions of `serde_derive` and `diesel_derives`, representing nearly 74% of the total errors alone, as well 6 different derive crates that represent 11.4% that would need to be updated.")
	print()
	print("To put it simply:")
	print()
	print(" - {} ({:.1f}%) errors would be fixed by `cargo update` (representing {} different crates)".format(total_errors_fixed_by_cargo_update, float(total_errors_fixed_by_cargo_update) / float(total_errors_excluding_deprecated) * 100, len(derives_crates)))
	print(" - {} ({:.1f}%) errors that could fixed by upstream change (at least 6 different derive crates)".format(total_derives_not_fixed, float(total_derives_not_fixed) / float(total_errors_excluding_deprecated) * 100))
	print(" - {} ({:.1f}%) errors requires manual intervention ({} different crates)".format(total_manual, float(total_manual) / float(total_errors_excluding_deprecated) * 100, total_rest_crates))
	print(" - {} ({:.1f}%) uncategorised errors (probably manual intervention and deps change): ".format(total_errors_uncat, float(total_errors_uncat) / float(total_errors_excluding_deprecated) * 100))
	print()
	print("I think those are more than acceptable numbers, in more that 3/4 of cases a `cargo update` would fix the warnings and by fixing 6 different derives we could get to 90%, the rest of cases would need manual interventions, but some intervention are to be expected.")
	print()
	print("Reminder that the lint is warn-by-default, so it wouldn't actually break any crate!")

	except Exception as ex:
	print("unexpected error: {}".format(ex), file=sys.stderr)
	sys.exit(84)

	if __name__ == "__main__":
	main()