Find links on a page
Usage
ragnar_find_links(
x,
depth = 0L,
children_only = FALSE,
progress = TRUE,
...,
url_filter = identity,
validate = FALSE
)Arguments
- x
URL, HTML file path, or XML document. For Markdown, convert to HTML using
commonmark::markdown_html()first.- depth
Integer specifying how many levels deep to crawl for links. When
depth > 0, the function will follow child links (links withxas a prefix) and collect links from those pages as well.- children_only
Logical or string. If
TRUE, returns only child links (those havingxas a prefix). IfFALSE, returns all links found on the page. Note that regardless of this setting, only child links are followed whendepth > 0.- progress
Logical, draw a progress bar if
depth > 0.- ...
Currently unused. Must be empty.
- url_filter
A function that takes a character vector of URL's and may subset them to return a smaller list. This can be useful for filtering out URL's by rules different than
children_onlywhich only checks the prefix.- validate
Default is
FALSE. IfTRUEsends aHEADrequest for each link and removes those that are not accessible. Requests are sent in parallel usinghttr2::req_perform_parallel().
Examples
# \dontrun{
ragnar_find_links("https://r4ds.hadley.nz/base-R.html")
#> [1] "https://adv-r.hadley.nz/subsetting.html"
#> [2] "https://dplyr.tidyverse.org/reference/across.html"
#> [3] "https://dplyr.tidyverse.org/reference/arrange.html"
#> [4] "https://dplyr.tidyverse.org/reference/filter.html"
#> [5] "https://dplyr.tidyverse.org/reference/group_by.html"
#> [6] "https://dplyr.tidyverse.org/reference/mutate.html"
#> [7] "https://dplyr.tidyverse.org/reference/pull.html"
#> [8] "https://dplyr.tidyverse.org/reference/relocate.html"
#> [9] "https://dplyr.tidyverse.org/reference/select.html"
#> [10] "https://dplyr.tidyverse.org/reference/summarise.html"
#> [11] "https://gist.github.com/hadley/1986a273e384fb2d4d752c18ed71bedf"
#> [12] "https://gist.github.com/hadley/c430501804349d382ce90754936ab8ec"
#> [13] "https://github.com/hadley/r4ds"
#> [14] "https://github.com/hadley/r4ds/edit/main/base-R.qmd"
#> [15] "https://github.com/hadley/r4ds/issues/new"
#> [16] "https://purrr.tidyverse.org/reference/map.html"
#> [17] "https://quarto.org"
#> [18] "https://r4ds.hadley.nz/EDA.html"
#> [19] "https://r4ds.hadley.nz/arrow.html"
#> [20] "https://r4ds.hadley.nz/base-R.html"
#> [21] "https://r4ds.hadley.nz/communicate.html"
#> [22] "https://r4ds.hadley.nz/communication.html"
#> [23] "https://r4ds.hadley.nz/data-import.html"
#> [24] "https://r4ds.hadley.nz/data-tidy.html"
#> [25] "https://r4ds.hadley.nz/data-transform.html"
#> [26] "https://r4ds.hadley.nz/data-visualize.html"
#> [27] "https://r4ds.hadley.nz/databases.html"
#> [28] "https://r4ds.hadley.nz/datetimes.html"
#> [29] "https://r4ds.hadley.nz/factors.html"
#> [30] "https://r4ds.hadley.nz/functions.html"
#> [31] "https://r4ds.hadley.nz/import.html"
#> [32] "https://r4ds.hadley.nz/intro.html"
#> [33] "https://r4ds.hadley.nz/iteration.html"
#> [34] "https://r4ds.hadley.nz/joins.html"
#> [35] "https://r4ds.hadley.nz/layers.html"
#> [36] "https://r4ds.hadley.nz/logicals.html"
#> [37] "https://r4ds.hadley.nz/missing-values.html"
#> [38] "https://r4ds.hadley.nz/numbers.html"
#> [39] "https://r4ds.hadley.nz/preface-2e.html"
#> [40] "https://r4ds.hadley.nz/program.html"
#> [41] "https://r4ds.hadley.nz/quarto-formats.html"
#> [42] "https://r4ds.hadley.nz/quarto.html"
#> [43] "https://r4ds.hadley.nz/rectangling.html"
#> [44] "https://r4ds.hadley.nz/regexps.html"
#> [45] "https://r4ds.hadley.nz/spreadsheets.html"
#> [46] "https://r4ds.hadley.nz/strings.html"
#> [47] "https://r4ds.hadley.nz/transform.html"
#> [48] "https://r4ds.hadley.nz/visualize.html"
#> [49] "https://r4ds.hadley.nz/webscraping.html"
#> [50] "https://r4ds.hadley.nz/whole-game.html"
#> [51] "https://r4ds.hadley.nz/workflow-basics.html"
#> [52] "https://r4ds.hadley.nz/workflow-help.html"
#> [53] "https://r4ds.hadley.nz/workflow-scripts.html"
#> [54] "https://r4ds.hadley.nz/workflow-style.html"
#> [55] "https://rdrr.io/r/base/Arithmetic.html"
#> [56] "https://rdrr.io/r/base/Extremes.html"
#> [57] "https://rdrr.io/r/base/NA.html"
#> [58] "https://rdrr.io/r/base/apply.html"
#> [59] "https://rdrr.io/r/base/c.html"
#> [60] "https://rdrr.io/r/base/cbind.html"
#> [61] "https://rdrr.io/r/base/data.frame.html"
#> [62] "https://rdrr.io/r/base/do.call.html"
#> [63] "https://rdrr.io/r/base/lapply.html"
#> [64] "https://rdrr.io/r/base/length.html"
#> [65] "https://rdrr.io/r/base/levels.html"
#> [66] "https://rdrr.io/r/base/library.html"
#> [67] "https://rdrr.io/r/base/list.files.html"
#> [68] "https://rdrr.io/r/base/list.html"
#> [69] "https://rdrr.io/r/base/logical.html"
#> [70] "https://rdrr.io/r/base/mean.html"
#> [71] "https://rdrr.io/r/base/numeric.html"
#> [72] "https://rdrr.io/r/base/order.html"
#> [73] "https://rdrr.io/r/base/seq.html"
#> [74] "https://rdrr.io/r/base/subset.html"
#> [75] "https://rdrr.io/r/base/sum.html"
#> [76] "https://rdrr.io/r/base/tapply.html"
#> [77] "https://rdrr.io/r/base/transform.html"
#> [78] "https://rdrr.io/r/base/vector.html"
#> [79] "https://rdrr.io/r/base/which.html"
#> [80] "https://rdrr.io/r/base/with.html"
#> [81] "https://rdrr.io/r/graphics/hist.html"
#> [82] "https://rdrr.io/r/graphics/plot.default.html"
#> [83] "https://rdrr.io/r/stats/Uniform.html"
#> [84] "https://rdrr.io/r/utils/str.html"
#> [85] "https://readxl.tidyverse.org/reference/read_excel.html"
#> [86] "https://tibble.tidyverse.org/reference/tibble.html"
#> [87] "https://tidyselect.r-lib.org/reference/starts_with.html"
#> [88] "https://tidyverse.tidyverse.org"
ragnar_find_links("https://ellmer.tidyverse.org/")
#> [1] "http://schloerke.com"
#> [2] "https://ai.google.dev/gemini-api/terms"
#> [3] "https://cloud.r-project.org/package=ellmer"
#> [4] "https://docs.posit.co/connect/user/oauth-integrations"
#> [5] "https://docs.posit.co/ide/server-pro/user/posit-workbench/managed-credentials/managed-credentials.html"
#> [6] "https://ellmer.tidyverse.org"
#> [7] "https://ellmer.tidyverse.org/LICENSE-text.html"
#> [8] "https://ellmer.tidyverse.org/LICENSE.html"
#> [9] "https://ellmer.tidyverse.org/articles/ellmer.html"
#> [10] "https://ellmer.tidyverse.org/articles/programming.html"
#> [11] "https://ellmer.tidyverse.org/articles/prompt-design.html"
#> [12] "https://ellmer.tidyverse.org/articles/streaming-async.html"
#> [13] "https://ellmer.tidyverse.org/articles/structured-data.html"
#> [14] "https://ellmer.tidyverse.org/articles/tool-calling.html"
#> [15] "https://ellmer.tidyverse.org/authors.html"
#> [16] "https://ellmer.tidyverse.org/index.html"
#> [17] "https://ellmer.tidyverse.org/news/index.html"
#> [18] "https://ellmer.tidyverse.org/reference/chat-any.html"
#> [19] "https://ellmer.tidyverse.org/reference/chat_anthropic.html"
#> [20] "https://ellmer.tidyverse.org/reference/chat_aws_bedrock.html"
#> [21] "https://ellmer.tidyverse.org/reference/chat_azure_openai.html"
#> [22] "https://ellmer.tidyverse.org/reference/chat_cloudflare.html"
#> [23] "https://ellmer.tidyverse.org/reference/chat_databricks.html"
#> [24] "https://ellmer.tidyverse.org/reference/chat_deepseek.html"
#> [25] "https://ellmer.tidyverse.org/reference/chat_github.html"
#> [26] "https://ellmer.tidyverse.org/reference/chat_google_gemini.html"
#> [27] "https://ellmer.tidyverse.org/reference/chat_groq.html"
#> [28] "https://ellmer.tidyverse.org/reference/chat_huggingface.html"
#> [29] "https://ellmer.tidyverse.org/reference/chat_mistral.html"
#> [30] "https://ellmer.tidyverse.org/reference/chat_ollama.html"
#> [31] "https://ellmer.tidyverse.org/reference/chat_openai.html"
#> [32] "https://ellmer.tidyverse.org/reference/chat_openrouter.html"
#> [33] "https://ellmer.tidyverse.org/reference/chat_perplexity.html"
#> [34] "https://ellmer.tidyverse.org/reference/chat_snowflake.html"
#> [35] "https://ellmer.tidyverse.org/reference/chat_vllm.html"
#> [36] "https://ellmer.tidyverse.org/reference/content_image_url.html"
#> [37] "https://ellmer.tidyverse.org/reference/index.html"
#> [38] "https://ellmer.tidyverse.org/reference/live_console.html"
#> [39] "https://garrickadenbuie.com"
#> [40] "https://github.com/posit-dev/chatlas"
#> [41] "https://github.com/tidyverse/ellmer"
#> [42] "https://github.com/tidyverse/ellmer/issues"
#> [43] "https://hadley.nz"
#> [44] "https://ollama.com"
#> [45] "https://opensource.org/licenses/mit-license.php"
#> [46] "https://orcid.org/0000-0001-9986-114X"
#> [47] "https://orcid.org/0000-0002-7111-0077"
#> [48] "https://orcid.org/0000-0003-4757-117X"
#> [49] "https://pkgdown.r-lib.org"
#> [50] "https://posit-dev.github.io/mcptools"
#> [51] "https://posit-dev.github.io/shinychat"
#> [52] "https://posit.co/blog/announcing-ellmer"
#> [53] "https://r6.r-lib.org"
#> [54] "https://ragnar.tidyverse.org"
#> [55] "https://rdrr.io/r/base/library.html"
#> [56] "https://rdrr.io/r/utils/install.packages.html"
#> [57] "https://rdrr.io/r/utils/str.html"
#> [58] "https://ror.org/03wc8by49"
#> [59] "https://tidyverse.org/blog/2025/11/ellmer-0-4-0"
#> [60] "https://vitals.tidyverse.org"
#> [61] "https://www.posit.co"
#> [62] "https://www.tidyverse.org/blog/2025/05/ellmer-0-2-0"
#> [63] "https://www.tidyverse.org/blog/2025/07/ellmer-0-3-0"
ragnar_find_links(
paste0("https://github.com/Snowflake-Labs/sfquickstarts/",
"tree/master/site/sfguides/src/build_a_custom_model_for_anomaly_detection"),
children_only = "https://github.com/Snowflake-Labs/sfquickstarts",
depth = 1
)
#> Error in open.connection(con, open = mode): cannot open the connection
# }
