Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save arizvisa/ba180b27fa21a3a65ab12f534be19d27 to your computer and use it in GitHub Desktop.
Save arizvisa/ba180b27fa21a3a65ab12f534be19d27 to your computer and use it in GitHub Desktop.
Patch to weggli-rs/weggli@dc73a25 to add support for spans and bounds to the python module.
From b322ecac9992c8bd8c3dfeaf2579a0b2b4b81336 Mon Sep 17 00:00:00 2001
From: Ali Rizvi-Santiago <[email protected]>
Date: Sat, 28 May 2022 11:49:00 -0500
Subject: [PATCH 1/2] saving my game prior to modifying the tree-sitter-c
grammar
100.0% src/
diff --git a/src/builder.rs b/src/builder.rs
index 13908b0..f600032 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -125,6 +125,129 @@ fn _build_query_tree(
debug!("tree_sitter query {}: {}", id, sexp);
+ match crate::ts_query(&sexp, is_cpp) {
+ Ok(qr) => {
+ return QueryTree::new(qr, b.captures, variables, b.negations, id);
+ }
+ Err(e) => {
+ eprintln!(
+ "Tree sitter query generation failed: {:?}\n {}",
+ e.kind, e.message
+ );
+ eprintln!("sexpr: {}", sexp);
+ eprintln!("This is a bug! Can't recover :/");
+ std::process::exit(1);
+ }
+ }
+ /*
+ QueryTree::new(
+ crate::ts_query(&sexp, is_cpp),
+ b.captures,
+ variables,
+ b.negations,
+ id,
+ )
+ */
+}
+
+pub fn check_query_tree (
+ source: &str,
+ c: &mut TreeCursor,
+ id: usize,
+ is_cpp: bool,
+ is_multi_pattern: bool,
+ strict_mode: bool,
+ regex_constraints: Option<RegexMap>,
+) -> Result<QueryTree, tree_sitter::QueryError> {
+ let mut b = QueryBuilder {
+ query_source: source.to_string(),
+ captures: Vec::new(),
+ negations: Vec::new(),
+ id,
+ cpp: is_cpp,
+ regex_constraints: match regex_constraints {
+ Some(r) => r,
+ None => RegexMap::new(HashMap::new()),
+ },
+ };
+
+ // Skip the root node if it's a translation_unit.
+ if c.node().kind() == "translation_unit" {
+ debug!("query cursor specifies translation_unit");
+ c.goto_first_child();
+ }
+
+ let mut variables = HashSet::new();
+
+ let sexp = if !is_multi_pattern {
+ // We want to wrap queries into a function_definition so we can easily
+ // extract the function that contains a match. Of course we should not do that
+ // if the user specifies a function_definition as part of the query.
+ let needs_anchor = c.node().kind() == "compound_statement" && id == 0;
+ debug!("query needs anchor: {}", needs_anchor);
+
+ // The main work happens here. Iterate through the AST and create a tree-sitter query
+ let mut s = b.build(c, 0, strict_mode);
+
+ // Make sure user supplied function headers are displayed by adding a Capture
+ if !needs_anchor {
+ s += "@";
+ s += &add_capture(&mut b.captures, Capture::Display);
+ }
+
+ // Iterate through all captures, add their constraints to the query and extract used variables
+ s += &process_captures(&b.captures, 0, &mut variables);
+
+ // Optionally anchor query with a function_definition
+ if needs_anchor {
+ let capture = Capture::Display;
+ format!(
+ "(function_definition body: {}) @{}",
+ s,
+ &add_capture(&mut b.captures, capture)
+ )
+ } else {
+ "(".to_string() + &s + ")"
+ }
+ } else {
+ // When building a QueryTree for a compound statement, we create a tree-sitter
+ // query with multiple root patterns for efficient searching.
+ // This code is only executed when creating sub queries so we can skip
+ // the whole anchoring logic needed for the single pattern case.
+
+ assert!(c.goto_first_child());
+ assert!(c.goto_next_sibling());
+
+ let mut s = String::new();
+ loop {
+ let child = c.node();
+ if !c.goto_next_sibling() {
+ break;
+ }
+
+ let before = b.captures.len();
+ let mut cursor = child.walk();
+
+ let child_sexp = b.build(&mut cursor, 0, strict_mode);
+
+ let captures = &process_captures(&b.captures, before, &mut variables);
+
+ if !child_sexp.is_empty() {
+ s += &format!("({} {})", child_sexp, captures);
+ }
+ }
+ s
+ };
+
+ debug!("tree_sitter query {}: {}", id, sexp);
+
+ match crate::ts_query(&sexp, is_cpp) {
+ Ok(qr) => {
+ Ok(QueryTree::new(qr, b.captures, variables, b.negations, id))
+ }
+ Err(e) => Err(e)
+ }
+ /*
QueryTree::new(
crate::ts_query(&sexp, is_cpp),
b.captures,
@@ -132,6 +255,7 @@ fn _build_query_tree(
b.negations,
id,
)
+ */
}
/// Iterates through `captures` starting at `offset` and returns the necessary query predicates as a string.
diff --git a/src/lib.rs b/src/lib.rs
index b58d861..997516a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -56,13 +56,16 @@ pub fn parse(source: &str, cpp: bool) -> Tree {
}
// Internal helper function to create a new tree-sitter query.
-fn ts_query(sexpr: &str, cpp: bool) -> tree_sitter::Query {
+fn ts_query(sexpr: &str, cpp: bool) -> Result<tree_sitter::Query, tree_sitter::QueryError> {
let language = if !cpp {
unsafe { tree_sitter_c() }
} else {
unsafe { tree_sitter_cpp() }
};
+ return Query::new(language, sexpr);
+
+ /*
match Query::new(language, sexpr) {
Ok(q) => q,
Err(e) => {
@@ -75,6 +78,7 @@ fn ts_query(sexpr: &str, cpp: bool) -> tree_sitter::Query {
std::process::exit(1);
}
}
+ */
}
/// Map from variable names to a positive/negative regex constraint
diff --git a/src/python.rs b/src/python.rs
index d841c8f..ccaf0df 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -15,10 +15,16 @@
*/
use pyo3::prelude::*;
+use pyo3::types::*;
use pyo3::wrap_pyfunction;
+use pyo3::exceptions::PyRuntimeError;
use crate::query::QueryTree;
use crate::result::QueryResult;
+use crate::RegexMap;
+
+use regex::Regex;
+use std::collections::HashMap;
#[pyclass]
struct QueryTreePy {
@@ -30,14 +36,223 @@ struct QueryResultPy {
qr: QueryResult,
}
-#[pyfunction(cpp = "false")]
-#[text_signature = "(query, cpp)"]
-fn parse_query(q: &str, cpp: bool) -> PyResult<QueryTreePy> {
- let tree = crate::parse(q, cpp);
- let mut c = tree.walk();
+const VALID_NODE_KINDS: &[&str] = &[
+ "compound_statement",
+ "function_definition",
+ "struct_specifier",
+ "enum_specifier",
+ "union_specifier",
+ "class_specifier",
+
+ /* because we're searching for both declarations and expressions */
+ "expression_statement",
+ "declaration",
+];
+#[pyfunction(cpp = "false", force_query = "false")]
+#[text_signature = "(query, cpp, force_query, regexes)"]
+fn parse_query(pattern: &str, cpp: bool, force_query: bool, regexes: Option<HashMap::<&str, &str>>) -> PyResult<QueryTreePy> {
+
+ let mut tree = crate::parse(pattern, cpp);
+ //let mut cur = tree.walk();
+ let mut p = pattern;
+
+ /*
+ let temp_pattern;
+
+ // we need to filter the cursor using validate_query
+ if tree.root_node().has_error() {
+ if !pattern.ends_with(';') {
+ temp_pattern = format!("{};", &p);
+ let fixed_tree = crate::parse(&temp_pattern, cpp);
+ if !fixed_tree.root_node().has_error() {
+ info!("normalizing query: add missing ;");
+ tree = fixed_tree;
+ p = &temp_pattern;
+ }
+ }
+ }
+
+ // Try to do query normalization to support missing { }
+ // 'memcpy(_);' -> {memcpy(_);}
+ let temp_pattern2;
+ if !tree.root_node().has_error() {
+ let c = tree.root_node().child(0);
+ if let Some(n) = c {
+ if !VALID_NODE_KINDS.contains(&n.kind()) {
+ temp_pattern2 = format!("{{{}}}", &p);
+ let fixed_tree = crate::parse(&temp_pattern2, cpp);
+ if !fixed_tree.root_node().has_error() {
+ info!("normalizing query: add {}", "{}");
+ tree = fixed_tree;
+ p = &temp_pattern2;
+ }
+ }
+ }
+ }
+ */
+
+ //let mut cur = tree.walk();
+ //let mut cur = validate_query(&tree, p, force_query)?;
+ let mut cur = validate_query(&tree, p, force_query);
+
+ let constraints = match regexes {
+ Some(regexes) => {
+ match process_regexes(regexes) {
+ Ok(regexconstraints) => {
+ Some(regexconstraints)
+ }
+ Err(err) => {
+ return Err(PyRuntimeError::new_err(err))
+ }
+ }
+ }
+ None => None
+ };
+
+ match &mut cur {
+ Err(e) => {
+ Err(PyRuntimeError::new_err(format!("Tree sitter query validation failed: {}", e)))
+ }
+ Ok(cursor) => {
+
+ // guard build_query_tree for python so that we can avoid an exit
+ match crate::builder::check_query_tree(p, cursor, 0, cpp, false, false, constraints /*None*/) {
+ Ok(qt) => {
+ //let qt = crate::builder::build_query_tree(q, &mut c, cpp, None);
+ Ok(QueryTreePy { qt })
+ }
+ Err(e) => {
+ Err(PyRuntimeError::new_err(format!("Tree sitter query generation failed: {:?}\n {}",
+ e.kind, e.message
+ )))
+ }
+ }
+ }
+ }
+ /*
let qt = crate::builder::build_query_tree(q, &mut c, cpp, None);
Ok(QueryTreePy { qt })
+ */
+}
+
+enum RegexError {
+ InvalidArg(String),
+ InvalidRegex(regex::Error),
+}
+
+impl From<regex::Error> for RegexError {
+ fn from(err: regex::Error) -> RegexError {
+ RegexError::InvalidRegex(err)
+ }
+}
+
+fn process_regexes(regexes: HashMap::<&str, &str>) -> Result<RegexMap, String> {
+ let mut result = HashMap::new();
+ let mut error = None;
+
+ for (var, raw_regex) in regexes {
+ //let mut s = r.splitn(2, '=');
+ //let var = s.next().ok_or_else(|| RegexError::InvalidArg(r.clone()))?;
+ //let raw_regex = s.next().ok_or_else(|| RegexError::InvalidArg(r.clone()))?;
+
+ let mut normalized_var = if var.starts_with('$') {
+ var.to_string()
+ } else {
+ "$".to_string() + var
+ };
+ let negative = normalized_var.ends_with('!');
+
+ if negative {
+ normalized_var.pop(); // remove !
+ }
+
+ //let regex = Regex::new(raw_regex);
+ match Regex::new(raw_regex) {
+ Ok(regex) => {
+ error = None;
+ result.insert(normalized_var, (negative, regex));
+ }
+ Err(msg) => {
+ error = Some(msg);
+ break
+ }
+ }
+ }
+
+ match error {
+ Some(regex) => {
+ Err(format!("Regex error {}", regex))
+ }
+ None => {
+ Ok(RegexMap::new(result))
+ }
+ }
+}
+
+fn validate_query<'a>(
+ tree: &'a tree_sitter::Tree,
+ query: &str,
+ force: bool,
+) -> Result<tree_sitter::TreeCursor<'a>, String> {
+ if tree.root_node().has_error() && !force {
+ let mut errmsg = format!("{}", "Error! Query parsing failed:");
+ let mut cursor = tree.root_node().walk();
+
+ let mut first_error = None;
+ loop {
+ let node = cursor.node();
+ if node.has_error() {
+ if node.is_error() || node.is_missing() {
+ first_error = Some(node);
+ break;
+ } else if !cursor.goto_first_child() {
+ break;
+ }
+ } else if !cursor.goto_next_sibling() {
+ break;
+ }
+ }
+
+ if let Some(node) = first_error {
+ errmsg.push_str(&format!(" {}", &query[0..node.start_byte()]));
+ if node.is_missing() {
+ errmsg.push_str(&format!(
+ "{}{}{}",
+ " [MISSING ", node.kind(), " ] "
+ ));
+ }
+ errmsg.push_str(&format!(
+ "{}{}",
+ &query[node.start_byte()..node.end_byte()],
+ &query[node.end_byte()..]
+ ));
+ }
+
+ return Err(errmsg);
+ }
+
+ info!("query sexp: {}", tree.root_node().to_sexp());
+
+ let mut c = tree.walk();
+
+ if c.node().named_child_count() > 1 {
+ return Err(format!(
+ "{}'{}' query contains multiple root nodes",
+ "Error: ", query
+ ));
+ }
+
+ c.goto_first_child();
+
+ if !VALID_NODE_KINDS.contains(&c.node().kind()) {
+ return Err(format!(
+ "{}'{}' ({}) is not a supported query root node.",
+ "Error: ", query, c.node().kind()
+ ));
+ }
+
+ Ok(c)
}
#[pyfunction]
@@ -58,17 +273,59 @@ fn matches(p: &QueryTreePy, source: &str, cpp: bool) -> PyResult<Vec<QueryResult
Ok(r)
}
-#[pyfunction(color = "None")]
-#[text_signature = "(q, source, color)"]
-fn display(p: &QueryResultPy, source: &str, color: Option<bool>) -> PyResult<String> {
+#[pyfunction(color = "None", before = 10, after = 10)]
+#[text_signature = "(q, source, color, before, after)"]
+fn display(p: &QueryResultPy, source: &str, color: Option<bool>, before: usize, after: usize) -> PyResult<String> {
if let Some(color_override) = color {
colored::control::set_override(color_override);
}
- let r = p.qr.display(source, 10, 10);
+ let r = p.qr.display(source, before, after);
colored::control::unset_override();
Ok(r)
}
+#[pyfunction]
+#[text_signature = "(q)"]
+fn bounds(p: &QueryResultPy) -> (usize, usize) {
+ return (p.qr.function.start, p.qr.function.end);
+}
+
+#[pyfunction]
+#[text_signature = "(q)"]
+fn spans(p: &QueryResultPy) -> PyResult<Vec<(String, usize, usize)>> {
+ let mut sorted = p.qr.captures.clone();
+ sorted.sort_by(|a, b| a.range.start.cmp(&b.range.start));
+
+ let mut varlookup: HashMap::<usize, String> = HashMap::with_capacity(p.qr.vars.len());
+ for (index, name) in p.qr.vars.clone().into_iter().map(|(name, index)| (index, name)) {
+ varlookup.insert(index, name);
+ }
+
+ let mut parallel_vartable: Vec<String> = Vec::with_capacity(p.qr.captures.len());
+ let mut clean_ranges: Vec<std::ops::Range<usize>> = Vec::with_capacity(p.qr.captures.len());
+ //for r in sorted.into_iter().skip(1).map(|c| c.range) {
+ for r in sorted.into_iter().skip(1) {
+ if !clean_ranges.is_empty() && clean_ranges.last().unwrap().contains(&r.range.start) {
+ continue;
+ }
+ clean_ranges.push(r.range.clone());
+ let idx: usize = (r.capture_idx as usize) + 1; /* this +1 is prolly busted */
+ match varlookup.get(&idx) {
+ Some(s) => parallel_vartable.push(s.to_string()),
+ None => parallel_vartable.push("".to_string()),
+ }
+ }
+
+ let mut result: Vec<(String, usize, usize)> = Vec::with_capacity(clean_ranges.len());
+ for (i, r) in clean_ranges.iter().enumerate() {
+ let s = &parallel_vartable[i];
+ result.push((s.to_string(), r.start, r.end))
+ }
+ Ok(result.into_iter().collect())
+
+ //Ok(clean_ranges.into_iter().map(|r| (r.start, r.end)).collect())
+}
+
#[pymodule]
fn weggli(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<QueryTreePy>()?;
@@ -76,6 +333,8 @@ fn weggli(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(identifiers, m)?)?;
m.add_function(wrap_pyfunction!(matches, m)?)?;
m.add_function(wrap_pyfunction!(display, m)?)?;
+ m.add_function(wrap_pyfunction!(bounds, m)?)?;
+ m.add_function(wrap_pyfunction!(spans, m)?)?;
Ok(())
}
diff --git a/src/result.rs b/src/result.rs
index 1d32094..83183b4 100644
--- a/src/result.rs
+++ b/src/result.rs
@@ -31,7 +31,8 @@ pub struct QueryResult {
pub vars: FxHashMap<String, usize>,
// Range of the outermost node. This is badly named as it does not have to be a
// function definition, but for final query results it normally is.
- function: std::ops::Range<usize>,
+ pub function: std::ops::Range<usize>,
+ //function: std::ops::Range<usize>,
}
/// Stores the result (== source range) for a single capture.
--
2.43.0
From dc73a25ec28ca42d65517042988fc95076d851e7 Mon Sep 17 00:00:00 2001
From: Ali Rizvi-Santiago <[email protected]>
Date: Fri, 26 Aug 2022 10:53:12 -0500
Subject: [PATCH 2/2] looks like i modified parse_query or something
100.0% src/
diff --git a/src/python.rs b/src/python.rs
index ccaf0df..a53d1c9 100644
--- a/src/python.rs
+++ b/src/python.rs
@@ -51,50 +51,10 @@ const VALID_NODE_KINDS: &[&str] = &[
#[pyfunction(cpp = "false", force_query = "false")]
#[text_signature = "(query, cpp, force_query, regexes)"]
-fn parse_query(pattern: &str, cpp: bool, force_query: bool, regexes: Option<HashMap::<&str, &str>>) -> PyResult<QueryTreePy> {
-
- let mut tree = crate::parse(pattern, cpp);
- //let mut cur = tree.walk();
- let mut p = pattern;
-
- /*
- let temp_pattern;
-
- // we need to filter the cursor using validate_query
- if tree.root_node().has_error() {
- if !pattern.ends_with(';') {
- temp_pattern = format!("{};", &p);
- let fixed_tree = crate::parse(&temp_pattern, cpp);
- if !fixed_tree.root_node().has_error() {
- info!("normalizing query: add missing ;");
- tree = fixed_tree;
- p = &temp_pattern;
- }
- }
- }
-
- // Try to do query normalization to support missing { }
- // 'memcpy(_);' -> {memcpy(_);}
- let temp_pattern2;
- if !tree.root_node().has_error() {
- let c = tree.root_node().child(0);
- if let Some(n) = c {
- if !VALID_NODE_KINDS.contains(&n.kind()) {
- temp_pattern2 = format!("{{{}}}", &p);
- let fixed_tree = crate::parse(&temp_pattern2, cpp);
- if !fixed_tree.root_node().has_error() {
- info!("normalizing query: add {}", "{}");
- tree = fixed_tree;
- p = &temp_pattern2;
- }
- }
- }
- }
- */
-
- //let mut cur = tree.walk();
- //let mut cur = validate_query(&tree, p, force_query)?;
- let mut cur = validate_query(&tree, p, force_query);
+fn parse_query(pattern: &str, cpp: bool, force_query: bool, regexes: Option<HashMap::<&str, &str>>) -> PyResult<QueryTreePy>
+{
+ let tree = crate::parse(pattern, cpp);
+ let mut cursor = validate_query(&tree, pattern, force_query);
let constraints = match regexes {
Some(regexes) => {
@@ -110,14 +70,14 @@ fn parse_query(pattern: &str, cpp: bool, force_query: bool, regexes: Option<Hash
None => None
};
- match &mut cur {
+ match &mut cursor {
Err(e) => {
Err(PyRuntimeError::new_err(format!("Tree sitter query validation failed: {}", e)))
}
- Ok(cursor) => {
+ Ok(c) => {
// guard build_query_tree for python so that we can avoid an exit
- match crate::builder::check_query_tree(p, cursor, 0, cpp, false, false, constraints /*None*/) {
+ match crate::builder::check_query_tree(pattern, c, 0, cpp, false, false, constraints /*None*/) {
Ok(qt) => {
//let qt = crate::builder::build_query_tree(q, &mut c, cpp, None);
Ok(QueryTreePy { qt })
--
2.43.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment