Title: | Parse and Manipulate R Code |
---|---|
Description: | Parsing R code is key to build tools such as linters and stylers. This package provides a binding to the Rust crate 'ast-grep' so that one can parse and explore R code. |
Authors: | Etienne Bacher [aut, cre, cph] |
Maintainer: | Etienne Bacher <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.0.8 |
Built: | 2024-10-18 11:18:28 UTC |
Source: | https://github.com/etiennebacher/astgrepr |
Rules are the core of astgrepr
. Those are used to search for nodes and are
used in node_match*()
and node_find*()
functions. ast_rule()
is a very
flexible function that allows one to build simple rules but also much more
complex and specific ones.
ast_rule( pattern = NULL, kind = NULL, regex = NULL, inside = NULL, has = NULL, precedes = NULL, follows = NULL, all = NULL, any = NULL, not = NULL, matches = NULL, id = NULL )
ast_rule( pattern = NULL, kind = NULL, regex = NULL, inside = NULL, has = NULL, precedes = NULL, follows = NULL, all = NULL, any = NULL, not = NULL, matches = NULL, id = NULL )
pattern |
The pattern to look for. This can be a string or an object of
class |
kind |
The kind of nodes to look for. |
regex |
A regex used to look for nodes. This must follow the syntax of
the Rust |
inside |
In which node should the node we look for be positioned? This
can be another rule made with |
has |
Same input type as |
precedes |
Same input type as |
follows |
Same input type as |
all |
This takes one or a list of rules made with |
any |
This takes one or a list of rules made with |
not |
This takes one or a list of rules made with |
matches |
This takes the |
id |
The name of this rule. This can be reused in another rule with
|
A list (possibly nested) with the class "astgrep_rule"
.
Meta-variables allow us to capture some of the content in a pattern. Usually,
using $
followed by an id in uppercase letters is enough:
src <- "any(duplicated(x))" root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "any(duplicated($A))")) #> <List of 1 rule> #> |--rule_1: 1 node
However, in some cases using $
is a problem. For instance, if we want to
capture a column name coming after $
, then we can't use $
both as code
and as identifier.
src <- "df$a" root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "df$$A")) #> <List of 1 rule> #> |--rule_1: 0 node
In this situation, we can use µ
instead:
root |> node_find(ast_rule(pattern = "df$µA")) #> <List of 1 rule> #> |--rule_1: 1 node
ast_rule(pattern = "print($A)") ast_rule( pattern = "print($A)", inside = ast_rule( any = ast_rule( kind = c("for_statement", "while_statement") ) ) )
ast_rule(pattern = "print($A)") ast_rule( pattern = "print($A)", inside = ast_rule( any = ast_rule( kind = c("for_statement", "while_statement") ) ) )
Recover the tree root from a node
node_get_root(x)
node_get_root(x)
x |
A node, either from |
src <- " print('hi') fn <- function() { print('hello') } " root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "print($A)")) |> node_get_root() |> tree_root() |> node_text()
src <- " print('hi') fn <- function() { print('hello') } " root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "print($A)")) |> node_get_root() |> tree_root() |> node_text()
Find the kind of a node
node_kind(x)
node_kind(x)
x |
A node, either from |
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) x <- z + 1 any(duplicated(x))" root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "any(duplicated($VAR))")) |> node_kind() root |> node_find(ast_rule(pattern = "$X + $VALUE")) |> node_kind()
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) x <- z + 1 any(duplicated(x))" root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "any(duplicated($VAR))")) |> node_kind() root |> node_find(ast_rule(pattern = "$X + $VALUE")) |> node_kind()
Those functions find one or several nodes based on some rule:
node_find()
returns the first node that is found;
node_find_all()
returns a list of all nodes found.
Some arguments (such as kind
) require some knowledge of the tree-sitter
grammar of R. This grammar can be found here:
https://github.com/r-lib/tree-sitter-r/blob/main/src/grammar.json.
node_find(x, ..., files = NULL) node_find_all(x, ..., files = NULL)
node_find(x, ..., files = NULL) node_find_all(x, ..., files = NULL)
x |
A node, either from |
... |
Any number of rules created with |
files |
A vector of filenames containing rules. Those must be |
node_find()
returns a single SgNode
.
node_find_all()
returns a list of SgNode
s.
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(mtcars) any(duplicated(x))" root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "any(duplicated($A))")) root |> node_find_all(ast_rule(pattern = "any(duplicated($A))")) # using the 'kind' of the nodes to find elements src <- " a <- 1 while (TRUE) { print('a') } " root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(kind = "while_statement")) # one can pass several rules at once src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(mtcars) any(duplicated(x)) while (TRUE) { print('a') }" root <- src |> tree_new() |> tree_root() root |> node_find( ast_rule(pattern = "any(duplicated($A))"), ast_rule(kind = "while_statement") ) root |> node_find_all( ast_rule(pattern = "any(duplicated($A))"), ast_rule(kind = "while_statement") )
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(mtcars) any(duplicated(x))" root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "any(duplicated($A))")) root |> node_find_all(ast_rule(pattern = "any(duplicated($A))")) # using the 'kind' of the nodes to find elements src <- " a <- 1 while (TRUE) { print('a') } " root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(kind = "while_statement")) # one can pass several rules at once src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(mtcars) any(duplicated(x)) while (TRUE) { print('a') }" root <- src |> tree_new() |> tree_root() root |> node_find( ast_rule(pattern = "any(duplicated($A))"), ast_rule(kind = "while_statement") ) root |> node_find_all( ast_rule(pattern = "any(duplicated($A))"), ast_rule(kind = "while_statement") )
node_replace()
gives the replacement for a particular node.
node_replace_all()
does the same but for several nodes (e.g. the output of
node_find_all()
). The output of those functions can be passed to
tree_rewrite()
to rewrite the entire input code with those replacements.
node_replace(x, ...) node_replace_all(x, ...)
node_replace(x, ...) node_replace_all(x, ...)
x |
A node, either from |
... |
Named elements where the name is a rule ID and the value is a
character string indicating the replacement to apply to nodes that match this
rule. Meta-variables are accepted but the syntax is different: they must be
wrapped in |
src <- " x <- c(1, 2, 3) any(duplicated(x), na.rm = TRUE) any(duplicated(x)) if (any(is.na(x))) { TRUE } any(is.na(y))" root <- tree_new(src) |> tree_root() ### Only replace the first nodes found by each rule nodes_to_replace <- root |> node_find( ast_rule(id = "any_na", pattern = "any(is.na($VAR))"), ast_rule(id = "any_dup", pattern = "any(duplicated($VAR))") ) nodes_to_replace |> node_replace( any_na = "anyNA(~~VAR~~)", any_dup = "anyDuplicated(~~VAR~~) > 0" ) ### Replace all nodes found by each rule nodes_to_replace <- root |> node_find( ast_rule(id = "any_na", pattern = "any(is.na($VAR))"), ast_rule(id = "any_dup", pattern = "any(duplicated($VAR))") ) nodes_to_replace |> node_replace( any_na = "anyNA(~~VAR~~)", any_dup = "anyDuplicated(~~VAR~~) > 0" )
src <- " x <- c(1, 2, 3) any(duplicated(x), na.rm = TRUE) any(duplicated(x)) if (any(is.na(x))) { TRUE } any(is.na(y))" root <- tree_new(src) |> tree_root() ### Only replace the first nodes found by each rule nodes_to_replace <- root |> node_find( ast_rule(id = "any_na", pattern = "any(is.na($VAR))"), ast_rule(id = "any_dup", pattern = "any(duplicated($VAR))") ) nodes_to_replace |> node_replace( any_na = "anyNA(~~VAR~~)", any_dup = "anyDuplicated(~~VAR~~) > 0" ) ### Replace all nodes found by each rule nodes_to_replace <- root |> node_find( ast_rule(id = "any_na", pattern = "any(is.na($VAR))"), ast_rule(id = "any_dup", pattern = "any(duplicated($VAR))") ) nodes_to_replace |> node_replace( any_na = "anyNA(~~VAR~~)", any_dup = "anyDuplicated(~~VAR~~) > 0" )
Those functions extract the content of the meta-variable specified in
node_find()
:
node_get_match()
is used when the meta-variable refers to a single
pattern, e.g. "plot($A)
;
node_get_multiple_matches()
is used when the meta-variable captures all
elements in a pattern, e.g. "plot($$$A)"
.
node_get_match(x, meta_var) node_get_multiple_matches(x, meta_var)
node_get_match(x, meta_var) node_get_multiple_matches(x, meta_var)
x |
A node, either from |
meta_var |
The name given to one of the meta-variable(s) in
|
src <- "x <- rnorm(100, mean = 2) plot(mtcars)" root <- src |> tree_new() |> tree_root() # we capture a single element with "$A" so node_get_match() can be used root |> node_find(ast_rule(pattern = "plot($A)")) |> node_get_match("A") # we can specify the variable to extract root |> node_find(ast_rule(pattern = "rnorm($A, $B)")) |> node_get_match("B") # we capture many elements with "$$$A" so node_get_multiple_matches() can # be used here root |> node_find(ast_rule(pattern = "rnorm($$$A)")) |> node_get_multiple_matches("A")
src <- "x <- rnorm(100, mean = 2) plot(mtcars)" root <- src |> tree_new() |> tree_root() # we capture a single element with "$A" so node_get_match() can be used root |> node_find(ast_rule(pattern = "plot($A)")) |> node_get_match("A") # we can specify the variable to extract root |> node_find(ast_rule(pattern = "rnorm($A, $B)")) |> node_get_match("B") # we capture many elements with "$$$A" so node_get_multiple_matches() can # be used here root |> node_find(ast_rule(pattern = "rnorm($$$A)")) |> node_get_multiple_matches("A")
Get more precise information on a node
node_matches(x, ..., files = NULL) node_inside(x, ..., files = NULL) node_has(x, ..., files = NULL) node_precedes(x, ..., files = NULL) node_follows(x, ..., files = NULL)
node_matches(x, ..., files = NULL) node_inside(x, ..., files = NULL) node_has(x, ..., files = NULL) node_precedes(x, ..., files = NULL) node_follows(x, ..., files = NULL)
x |
A node, either from |
... |
Any number of rules created with |
files |
A vector of filenames containing rules. Those must be |
src <- " print('hi') fn <- function() { print('hello') } " root <- src |> tree_new() |> tree_root() some_node <- root |> node_find(ast_rule(pattern = "print($A)")) node_text(some_node) some_node |> node_get_match("A") |> node_matches(ast_rule(kind = "argument"))
src <- " print('hi') fn <- function() { print('hello') } " root <- src |> tree_new() |> tree_root() some_node <- root |> node_find(ast_rule(pattern = "print($A)")) node_text(some_node) some_node |> node_get_match("A") |> node_matches(ast_rule(kind = "argument"))
Get information on whether a node is a leaf (meaning that it doesn't have any children) and whether it is named.
node_is_leaf(x) node_is_named(x) node_is_named_leaf(x)
node_is_leaf(x) node_is_named(x) node_is_named_leaf(x)
x |
A node, either from |
A logical value.
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) x <- z + 1 any(duplicated(x))" root <- src |> tree_new() |> tree_root() node_is_leaf(root) root |> node_find(ast_rule(pattern = "z")) |> node_is_leaf() root |> node_find(ast_rule(pattern = "z")) |> node_is_named()
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) x <- z + 1 any(duplicated(x))" root <- src |> tree_new() |> tree_root() node_is_leaf(root) root |> node_find(ast_rule(pattern = "z")) |> node_is_leaf() root |> node_find(ast_rule(pattern = "z")) |> node_is_named()
Get the start and end positions of a node
node_range(x) node_range_all(x)
node_range(x) node_range_all(x)
x |
A node, either from |
A list of two elements: start
and end
. Each of those is a vector
with two values indicating the row and column. Those are 0-indexed.
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(x) any(duplicated(x))" root <- src |> tree_new() |> tree_root() node_range(root) root |> node_find(ast_rule(pattern = "rnorm($$$A)")) |> node_range() # There is also an "_all" variant when there are several nodes per rule root |> node_find_all( ast_rule(pattern = "any(duplicated($A))"), ast_rule(pattern = "plot($A)") ) |> node_range_all()
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(x) any(duplicated(x))" root <- src |> tree_new() |> tree_root() node_range(root) root |> node_find(ast_rule(pattern = "rnorm($$$A)")) |> node_range() # There is also an "_all" variant when there are several nodes per rule root |> node_find_all( ast_rule(pattern = "any(duplicated($A))"), ast_rule(pattern = "plot($A)") ) |> node_range_all()
Those functions extract the code corresponding to the node(s):
node_text()
applies on a single node, for example the output of
node_get_match()
node_text_all()
applies on a list of nodes, for example the output of
node_get_multiple_matches()
node_text(x) node_text_all(x)
node_text(x) node_text_all(x)
x |
A node, either from |
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(mtcars) any(duplicated(x))" root <- src |> tree_new() |> tree_root() # node_text() must be applied on single nodes root |> node_find(ast_rule(pattern = "plot($A)")) |> node_text() # node_find_all() returns a list on nodes on which # we can use node_text_all() root |> node_find_all(ast_rule(pattern = "any(duplicated($A))")) |> node_text_all()
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(mtcars) any(duplicated(x))" root <- src |> tree_new() |> tree_root() # node_text() must be applied on single nodes root |> node_find(ast_rule(pattern = "plot($A)")) |> node_text() # node_find_all() returns a list on nodes on which # we can use node_text_all() root |> node_find_all(ast_rule(pattern = "any(duplicated($A))")) |> node_text_all()
This is a collection of functions used to navigate the tree. Some of
them have a variant that applies on a single node (e.g. node_next()
) and
one that applies on a list of nodes (e.g. node_next_all()
):
node_prev()
, node_prev_all()
, node_next()
, and node_next_all()
get the previous and next node(s) that are at the same depth as the current
node;
node_parent()
, node_ancestors()
, node_child()
and node_children()
get the node(s) that are above or below the current node in terms of depth.
All nodes except the root node have at least one node (the root).
node_parent(x) node_child(x, nth) node_ancestors(x) node_children(x) node_next(x) node_next_all(x) node_prev(x) node_prev_all(x)
node_parent(x) node_child(x, nth) node_ancestors(x) node_children(x) node_next(x) node_next_all(x) node_prev(x) node_prev_all(x)
x |
A node, either from |
nth |
Integer. The child node to find. This is 0-indexed, so setting
|
### get the previous/next node --------------------------- src <- " print('hi there') a <- 1 fn <- function(x) { x + 1 } " root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "a <- $A")) |> node_prev() |> node_text() root |> node_find(ast_rule(pattern = "a <- $A")) |> node_next() |> node_text() # there are nodes inside the function, but there are no more nodes on the # same level as "fn" root |> node_find(ast_rule(pattern = "a <- $A")) |> node_next_all() |> node_text_all() ### get the parent/child node --------------------------- src <- " print('hi there') a <- 1 fn <- function(x) { x + 1 } " root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "$VAR + 1")) |> node_parent() |> node_text() root |> node_find(ast_rule(pattern = "$VAR + 1")) |> node_ancestors() |> node_text_all() root |> node_find(ast_rule(pattern = "$VAR + 1")) |> node_child(0) |> node_text() root |> node_find(ast_rule(pattern = "$VAR + 1")) |> node_children() |> node_text_all()
### get the previous/next node --------------------------- src <- " print('hi there') a <- 1 fn <- function(x) { x + 1 } " root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "a <- $A")) |> node_prev() |> node_text() root |> node_find(ast_rule(pattern = "a <- $A")) |> node_next() |> node_text() # there are nodes inside the function, but there are no more nodes on the # same level as "fn" root |> node_find(ast_rule(pattern = "a <- $A")) |> node_next_all() |> node_text_all() ### get the parent/child node --------------------------- src <- " print('hi there') a <- 1 fn <- function(x) { x + 1 } " root <- src |> tree_new() |> tree_root() root |> node_find(ast_rule(pattern = "$VAR + 1")) |> node_parent() |> node_text() root |> node_find(ast_rule(pattern = "$VAR + 1")) |> node_ancestors() |> node_text_all() root |> node_find(ast_rule(pattern = "$VAR + 1")) |> node_child(0) |> node_text() root |> node_find(ast_rule(pattern = "$VAR + 1")) |> node_children() |> node_text_all()
This is a specific type of rule. It can be used in the more general ruleset
built with ast_rule()
.
pattern_rule(selector = NULL, context = NULL, strictness = "smart")
pattern_rule(selector = NULL, context = NULL, strictness = "smart")
selector |
Defines the surrounding code that helps to resolve any ambiguity in the syntax. |
context |
Defines the sub-syntax node kind that is the actual matcher of the pattern. |
strictness |
Optional, defines how strictly pattern will match against nodes. See 'Details'. |
The strictness
parameter defines the type of nodes the ast-grep
matcher
should consider. It has the following values:
cst
: All nodes in the pattern and target code must be matched. No node
is skipped.
smart
: All nodes in the pattern must be matched, but it will skip unnamed
nodes in target code. This is the default behavior.
ast
: Only named AST nodes in both pattern and target code are matched.
All unnamed nodes are skipped.
relaxed
: Named AST nodes in both pattern and target code are matched.
Comments and unnamed nodes are ignored.
signature
: Only named AST nodes' kinds are matched. Comments, unnamed
nodes and text are ignored.
More information: https://ast-grep.github.io/guide/rule-config/atomic-rule.html#pattern-object
Build a relational rule
relational_rule(stopBy = "neighbor", field = NULL, regex = NULL)
relational_rule(stopBy = "neighbor", field = NULL, regex = NULL)
stopBy |
todo |
field |
todo |
regex |
todo |
This function takes R code as string and creates the corresponding abstract syntax tree (AST) from which we can query nodes.
tree_new(txt, file, ignore_tags = "ast-grep-ignore")
tree_new(txt, file, ignore_tags = "ast-grep-ignore")
txt |
A character string of length 1 containing the code to parse.
If provided, |
file |
Path to file containing the code to parse. If provided, |
ignore_tags |
Character vector indicating the tags to ignore. Default is
|
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(x) any(duplicated(x))" tree_new(src)
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(x) any(duplicated(x))" tree_new(src)
Rewrite the tree with a list of replacements
tree_rewrite(root, replacements)
tree_rewrite(root, replacements)
root |
The root tree, obtained via |
replacements |
A list of replacements, obtained via |
A string character corresponding to the code used to build the tree root but with replacements applied.
src <- "x <- c(1, 2, 3) any(duplicated(x), na.rm = TRUE) any(duplicated(x)) if (any(is.na(x))) { TRUE } any(is.na(y))" root <- tree_new(src) |> tree_root() ### Only replace the first nodes found by each rule nodes_to_replace <- root |> node_find( ast_rule(id = "any_na", pattern = "any(is.na($VAR))"), ast_rule(id = "any_dup", pattern = "any(duplicated($VAR))") ) fixes <- nodes_to_replace |> node_replace( any_na = "anyNA(~~VAR~~)", any_dup = "anyDuplicated(~~VAR~~) > 0" ) # original code cat(src) # new code tree_rewrite(root, fixes) ### Replace all nodes found by each rule nodes_to_replace <- root |> node_find_all( ast_rule(id = "any_na", pattern = "any(is.na($VAR))"), ast_rule(id = "any_dup", pattern = "any(duplicated($VAR))") ) fixes <- nodes_to_replace |> node_replace_all( any_na = "anyNA(~~VAR~~)", any_dup = "anyDuplicated(~~VAR~~) > 0" ) # original code cat(src) # new code tree_rewrite(root, fixes)
src <- "x <- c(1, 2, 3) any(duplicated(x), na.rm = TRUE) any(duplicated(x)) if (any(is.na(x))) { TRUE } any(is.na(y))" root <- tree_new(src) |> tree_root() ### Only replace the first nodes found by each rule nodes_to_replace <- root |> node_find( ast_rule(id = "any_na", pattern = "any(is.na($VAR))"), ast_rule(id = "any_dup", pattern = "any(duplicated($VAR))") ) fixes <- nodes_to_replace |> node_replace( any_na = "anyNA(~~VAR~~)", any_dup = "anyDuplicated(~~VAR~~) > 0" ) # original code cat(src) # new code tree_rewrite(root, fixes) ### Replace all nodes found by each rule nodes_to_replace <- root |> node_find_all( ast_rule(id = "any_na", pattern = "any(is.na($VAR))"), ast_rule(id = "any_dup", pattern = "any(duplicated($VAR))") ) fixes <- nodes_to_replace |> node_replace_all( any_na = "anyNA(~~VAR~~)", any_dup = "anyDuplicated(~~VAR~~) > 0" ) # original code cat(src) # new code tree_rewrite(root, fixes)
This function takes a tree created by tree_new()
and returns the root node
containing all subsequent nodes.
tree_root(x)
tree_root(x)
x |
A tree created by |
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(x) any(duplicated(x))" tree <- tree_new(src) tree_root(tree)
src <- "x <- rnorm(100, mean = 2) any(duplicated(y)) plot(x) any(duplicated(x))" tree <- tree_new(src) tree_root(tree)