Created
August 23, 2022 14:42
-
-
Save kddnewton/8459f08510830a7d6754f919bd7d8ee3 to your computer and use it in GitHub Desktop.
Extract constant references from files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# The purpose of this script is to extract all constant references from a given | |
# source file. It reads ARGF, which means you can either pass the content in | |
# through stdin or you can pass the file name as an argument. So for example, | |
# you can either: | |
# | |
# echo 'Foo::Bar' | ./constants | |
# | |
# or you can pass a file name, as in: | |
# | |
# echo 'Foo::Bar' >> test.rb | |
# ./constants test.rb | |
# | |
# It will output both the nesting level and the constant path. This is important | |
# for constant resolution, as A::B means different things depending on how far | |
# nested you are in classes and modules. | |
require "syntax_tree" | |
# Using a nested module here just so the constant references in | |
# extract_const_path are shorter. You could also include SyntaxTree or just use | |
# longer constant names, fine either way. | |
module SyntaxTree | |
class ConstPathRefVisitor < Visitor | |
# We're going to track the parents of the various nodes as we descend the | |
# tree. This is necessary if we want to be able to walk back up the tree to | |
# check the parent context. | |
attr_reader :stack | |
# This keeps track of the current nesting of classes and modules. It's | |
# necessary for constant resolution. This will track an array of arrays. For | |
# example, the following code: | |
# | |
# module A | |
# module B::C | |
# class D | |
# def foo | |
# E | |
# end | |
# end | |
# end | |
# end | |
# | |
# would result in a nesting of [["A"], ["B", "C"], ["D"]]. It's necessary to | |
# keep track of it like this instead of a flat array because otherwise you | |
# would be able to resolve C when in reality the foo method shouldn't be | |
# able to access C unless it's through B. | |
attr_reader :nesting | |
# Overriding the initializer here just so instantiate the @nesting variable. | |
def initialize(...) | |
super | |
@stack = [] | |
@nesting = [] | |
end | |
# Overriding the default visit method in order to keep track of the stack of | |
# parents as we visit them. | |
def visit(node) | |
@stack << node | |
super | |
@stack.pop | |
end | |
# Visiting class declarations in order to keep track of nesting. | |
def visit_class(node) | |
@nesting << extract_const_path(node.constant) | |
super | |
@nesting.pop | |
end | |
# ConstRef nodes occur when you are referencing a constant. They are always | |
# in a class or module definition, as in class Foo::Bar::Baz. Importantly | |
# these are different from a VarRef, which can reference constants in a | |
# variable context. | |
def visit_const_ref(node) | |
output_const_path(extract_const_path(node)) | |
super | |
end | |
# ConstPathRef occurs when you're referencing a constant path, i.e., any | |
# time you employ the :: operator on a constant. Importantly these are | |
# always resolved as relative references, as opposed to TopConstRef which is | |
# always absolute references. | |
def visit_const_path_ref(node) | |
output_const_path(extract_const_path(node)) | |
super | |
end | |
# Visiting module declarations in order to keep track of nesting. | |
def visit_module(node) | |
@nesting << extract_const_path(node.constant) | |
super | |
@nesting.pop | |
end | |
# TopConstRef occurs when you're referencing a constant at the top level, as | |
# in ::Foo::Bar. | |
def visit_top_const_ref(node) | |
unless stack[-2] in ConstPathRef | |
output_const_path(extract_const_path(node)) | |
end | |
super | |
end | |
# Whenever a value is used as a variable, it is contained within a VarRef | |
# node. In this case we'll check to see if it's referencing a plain | |
# constant (because otherwise it would be a ConstPathRef node). | |
def visit_var_ref(node) | |
if (node in VarRef[value: Const]) && !(stack[-2] in ConstPathRef) | |
output_const_path(extract_const_path(node)) | |
end | |
super | |
end | |
private | |
# Responsible for changing a node in the tree into an array of strings that | |
# represent the various pieces of the constant path. | |
def extract_const_path(node) | |
case node | |
in ConstRef[constant: Const[value:]] | |
[value] | |
in ConstPathRef[parent:, constant: Const[value:]] | |
extract_const_path(parent) + [value] | |
in TopConstRef[constant: Const[value:]] | |
["", value] | |
in VarRef[value: Const[value:]] | |
[value] | |
else | |
# If don't have one of the above patterns, then we have a more | |
# complicated path like Foo::Bar.baz::Qux::Quux. In this case we're just | |
# going to fall back to formatting it nicely as one item in the array. | |
Formatter.new("", []).then do |q| | |
node.format(q) | |
q.flush | |
[q.output.join] | |
end | |
end | |
end | |
# This is going to output the information about a constant path and its | |
# respective nesting. Here you could just track to an instance variable or | |
# call a block that you sent to the visitor initializer or really anything | |
# else. | |
def output_const_path(const_path) | |
puts "nesting=#{nesting.map { |nest| nest.join("::") }.join(",")}" | |
puts "const_path=#{const_path.join("::")}" | |
puts | |
end | |
end | |
end | |
SyntaxTree.parse(ARGF.read).accept(SyntaxTree::ConstPathRefVisitor.new) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment