Finding out which functions are called within a given function [duplicate]
try this example:
library(codetools)
ff <- function(f) {
leaf <- function (e, w) {
r <- try(eval(e), silent = TRUE)
if(!is.null(r) && is.function(r)) ret <<- c(ret, as.character(e))
}
call <- function (e, w) {
walkCode(e[[1]], w)
for (a in as.list(e[-1])) if (!missing(a)) walkCode(a, w)
}
ret <- c()
walkCode(body(f), makeCodeWalker(call = call, leaf = leaf, write = cat))
unique(ret)
}
then,
> ff(data.frame)
[1] "{" "<-" "if" "&&" "is.null" "row.names" "function" "is.character"
[9] "new" "as.character" "anyDuplicated" "return" "||" "all" "==" "stop"
[17] "gettextf" "warning" "paste" "which" "duplicated" "[" "as.list" "substitute"
[25] "list" "-" "missing" "length" "<" "!" "is.object" "is.integer"
[33] "any" "is.na" "unique" "integer" "structure" "character" "names" "!="
[41] "nzchar" "for" "seq_len" "[[" "is.list" "as.data.frame" ".row_names_info" ">"
[49] "deparse" "substr" "nchar" "attr" "abs" "max" "(" "%%"
[57] "unclass" "seq_along" "is.vector" "is.factor" "rep" "class" "inherits" "break"
[65] "next" "unlist" "make.names" "match" ".set_row_names"
> ff(read.table)
[1] "{" "if" "&&" "missing" "file" "!" "text" "<-" "textConnection"
[10] "on.exit" "close" "is.character" "nzchar" "inherits" "stop" "isOpen" "open" ">"
[19] "readLines" "<" "min" "(" "+" "lines" ".Internal" "quote" "length"
[28] "all" "==" "pushBack" "c" "stdin" "scan" "col" "numeric" "-"
[37] "for" "seq_along" "[" "max" "!=" "warning" "paste0" ":" "make.names"
[46] "names" "is.null" "rep" "match" "any" "<=" "rep.int" "list" "%in%"
[55] "sapply" "do.call" "data" "flush" "[[" "which" "is.logical" "is.numeric" "|"
[64] "gettextf" "&" "is.na" "type.convert" "character" "as.factor" "as.Date" "as.POSIXct" "::"
[73] "methods" "as" "row.names" ".set_row_names" "as.integer" "||" "is.object" "is.integer" "as.character"
[82] "anyDuplicated" "class" "attr"
There must be better ways out there, but here's my attempt:
listFunctions <- function(function.name, recursive = FALSE,
checked.functions = NULL){
# Get the function's code:
function.code <- deparse(get(function.name))
# break code up into sections preceding left brackets:
left.brackets <- c(unlist(strsplit(function.code,
split="[[:space:]]*\\(")))
called.functions <- unique(c(unlist(sapply(left.brackets,
function (x) {
# Split up according to anything that can't be in a function name.
# split = not alphanumeric, not '_', and not '.'
words <- c(unlist(strsplit(x, split="[^[:alnum:]_.]")))
last.word <- tail(words, 1)
last.word.is.function <- tryCatch(is.function(get(last.word)),
error=function(e) return(FALSE))
return(last.word[last.word.is.function])
}))))
if (recursive){
# checked.functions: We need to keep track of which functions
# we've checked to avoid infinite loops.
functs.to.check <- called.functions[!(called.functions %in%
checked.functions)]
called.functions <- unique(c(called.functions,
do.call(c, lapply(functs.to.check, function(x) {
listFunctions(x, recursive = T,
checked.functions = c(checked.functions,
called.functions))
}))))
}
return(called.functions)
}
And the results:
> listFunctions("listFunctions", recursive = FALSE)
[1] "function" "deparse" "get" "c"
[5] "unlist" "strsplit" "unique" "sapply"
[9] "tail" "tryCatch" "is.function" "return"
[13] "if" "do.call" "lapply" "listFunctions"
> system.time(all.functions <- listFunctions("listFunctions", recursive = TRUE))
user system elapsed
92.31 0.08 93.49
> length(all.functions)
[1] 518
As you can see, the recursive version returns a lot of functions. The problem with this is it returns every function called in the process, which obviously adds up as you go. In any case, I hope you can use this (or modify it) to suit your needs.
Disclaimer
This answer is based on answers by Edward and Kohske. I will not consider this for the answer finally accepted, its main purpose is simply to document another/extended approach and some benchmarks for other users.
Inner Function 1
Courtesy of Edward.
listFunctions_inner <- function(
name,
do.recursive=FALSE,
.do.verbose=FALSE,
.buffer=new.env()
){
..name <- "listFunctions_inner"
if (!is.character(name) | missing(name)) {
stop(paste(..name, " // expecting 'name' of class 'character'", sep=""))
}
name.0 <- name
if (tryCatch(is.function(get(name)), error=function(e) FALSE)) {
# PROCESS FUNCTIONS
if (.do.verbose) {
message(paste(..name, " // processing function: '", name, "'", sep=""))
}
# Get the function's code:
code <- deparse(get(name))
# break code up into sections preceding left brackets:
left.brackets <- c(unlist(strsplit(code, split="[[:space:]]*\\(")))
out <- sort(unique(unlist(lapply(left.brackets, function (x) {
# Split up according to anything that can't be in a function name.
# split = not alphanumeric, not '_', and not '.'
words <- c(unlist(strsplit(x, split="[^[:alnum:]_.]")))
last.word <- tail(words, 1)
last.word.is.function <- tryCatch(is.function(get(last.word)),
error=function(e) return(FALSE))
out <- last.word[last.word.is.function]
return(out)
}))))
if (do.recursive){
# funs.checked: We need to keep track of which functions
# we've checked to avoid infinite loops.
.buffer$funs.checked <- c(.buffer$funs.checked, name)
funs.next <- out[!(out %in% .buffer$funs.checked)]
if (length(funs.next)) {
out <- sort(unique(unlist(c(out, do.call(c,
lapply(funs.next, function(x) {
if (x == ".Primitive") {
return(NULL)
}
listFunctions_inner(
name=x,
do.recursive=TRUE,
.buffer=.buffer
)
})
)))))
}
}
out <- sort(unique(unlist(out)))
} else {
# PROCESS NAMESPACES
if (.do.verbose) {
message(paste(..name, " // processing namespace: '", name, "'", sep=""))
}
name <- paste("package", ":", name, sep="")
if (!name %in% search()) {
stop(paste(..name, " // invalid namespace: '", name.0, "'"))
}
# KEEP AS REFERENCE
# out <- ls(name)
funlist <- lsf.str(name)
out <- head(funlist, n=length(funlist))
}
out
}
Inner Function 2
Courtesy of Kohske
listFunctions2_inner <- function(
name,
do.recursive=FALSE,
.do.verbose=FALSE,
.buffer=new.env()
) {
..name <- "listFunctions2_inner"
if (!is.character(name) | missing(name)) {
stop(paste(..name, " // expecting 'name' of class 'character'", sep=""))
}
name.0 <- name
if (tryCatch(is.function(get(name)), error=function(e) FALSE)) {
# PROCESS FUNCTIONS
leaf <- function (e, w) {
r <- try(eval(e), silent = TRUE)
if(!is.null(r) && is.function(r)) out <<- c(out, as.character(e))
}
call <- function (e, w) {
walkCode(e[[1]], w)
for (a in as.list(e[-1])) if (!missing(a)) walkCode(a, w)
}
out <- c()
walkCode(
body(name),
makeCodeWalker(call=call, leaf=leaf, write=cat)
)
if (do.recursive){
# funs.checked: We need to keep track of which functions
# we've checked to avoid infinite loops.
.buffer$funs.checked <- c(.buffer$funs.checked, name)
funs.next <- out[!(out %in% .buffer$funs.checked)]
if (length(funs.next)) {
out <- sort(unique(unlist(c(out, do.call(c,
lapply(funs.next, function(x) {
if (x == ".Primitive") {
return(NULL)
}
listFunctions_inner(
name=x,
do.recursive=TRUE,
.buffer=.buffer
)
})
)))))
}
}
out <- sort(unique(out))
} else {
# PROCESS NAMESPACES
if (.do.verbose) {
message(paste(..name, " // processing namespace: '", name, "'", sep=""))
}
name <- paste("package", ":", name, sep="")
if (!name %in% search()) {
stop(paste(..name, " // invalid namespace: '", name.0, "'"))
}
# KEEP AS REFERENCE
# out <- ls(name)
funlist <- lsf.str(name)
out <- head(funlist, n=length(funlist))
}
}
Wrapper Function
This wrapper let's you choose the actual inner function used and allows to specify namespaces that should or should not be considered. That's important for my use case (see section Motivation above), as I'm usually only interested in "own" functions (in .GlobalEnv
) that have not yet been moved to a package.
listFunctions <- function(
name,
ns,
innerFunction=listFunctions,
do.inverse=FALSE,
do.table=FALSE,
do.recursive=FALSE,
.do.verbose=FALSE
){
..name <- "listFunctions_inner"
if (!is.character(name) | missing(name)) {
stop(paste(..name, " // expecting 'name' of class 'character'", sep=""))
}
out <- innerFunction(name, do.recursive=do.recursive,
.do.verbose=.do.verbose)
if (do.table) {
x.ns <- sapply(out, function(x) {
out <- environmentName(environment(get(x)))
if (out == "") {
out <- ".Primitive"
}
out
})
if (!missing(ns)) {
if (!do.inverse) {
idx <- which(x.ns %in% ns)
} else {
idx <- which(!x.ns %in% ns)
}
if (!length(idx)) {
return(NULL)
}
out <- out[idx]
x.ns <- x.ns[idx]
}
out <- data.frame(name=out, ns=x.ns, stringsAsFactors=FALSE)
rownames(out) <- NULL
}
out
}
Application
# Character vector
listFunctions("install.packages")
# Data Frame (table)
> listFunctions("install.packages", do.table=TRUE)
name ns
1 .libPaths .Primitive
2 .standard_regexps base
3 any .Primitive
4 available.packages utils
...
84 winDialog utils
# Consider 'base' only
> listFunctions("install.packages", ns="base", do.table=TRUE)
name ns
1 .standard_regexps base
2 basename base
3 capabilities base
...
56 warning base
# Consider all except 'base'
> listFunctions("install.packages", ns="base", do.inverse=TRUE, do.table=TRUE)
name ns
1 .libPaths .Primitive
2 any .Primitive
3 available.packages utils
...
28 winDialog utils
# Recursively, no table
listFunctions("install.packages", do.recursive=TRUE)
# Recursively table
listFunctions("install.packages", do.table=TRUE, do.recursive=TRUE)
name ns
1 .amatch_bounds base
2 .amatch_costs base
3 .C .Primitive
...
544 xzfile base
# List functions inside a namespace
listFunctions("utils")
listFunctions("utils", do.table=TRUE)
Benchmark Inner Function 1
> bench <- microbenchmark(listFunctions("install.packages"))
bench
> Unit: milliseconds
expr min lq median uq
1 listFunctions("install.packages") 152.9654 157.2805 160.5019 165.4688
max
1 244.6589
> bench <- microbenchmark(listFunctions("install.packages", do.recursive=TRUE), times=3)
bench
> Unit: seconds
expr min lq
1 listFunctions("install.packages", do.recursive = TRUE) 6.272732 6.30164
median uq max
1 6.330547 6.438158 6.545769
Benchmark Inner Function 2
> bench <- microbenchmark(listFunctions("install.packages",
+ innerFunction=listFunctions2_inner))
bench
> Unit: milliseconds
expr
1 listFunctions("install.packages", innerFunction = listFunctions2_inner)
min lq median uq max
1 207.0299 212.3286 222.6448 324.6399 445.4154
> bench <- microbenchmark(listFunctions("install.packages",
+ innerFunction=listFunctions2_inner, do.recursive=TRUE), times=3)
bench
Warning message:
In nm[nm == ""] <- exprnm[nm == ""] :
number of items to replace is not a multiple of replacement length
> Unit: seconds
expr
1 listFunctions("install.packages", innerFunction = listFunctions2_inner,
min lq median uq max
1 7.673281 8.065561 8.457841 8.558259 8.658678