This document presents some usage of map_flap
family functions.
You need to get the feature branch associated to run this code
Use can use dev mode to isolate your installed dev version of package
dev_mode(on = TRUE)
Install from PR 502
devtools::install_github("tidyverse/purrr#502")
you can load purrr then
library(purrr)
If you don’t have a dev purrr clone folder already, you can just clone for this script using a temporary directory.
temp_purrr <- fs::dir_create(fs::file_temp("purrr"))
# feature branch is in cderv's repo not tidyverse's
git2r::clone("https://github.com/cderv/purrr.git", local_path = temp_purrr, branch = "fix-405-map_flat_type")
Use devtools::load_all()
to load the dev version of the package
devtools::load_all(temp_purrr)
#> Loading purrr
#> Re-compiling purrr
At the end use fs::dir_delete(temp_purrr)
to delete. Note that the temp
folder will be deleted automaticaly too.
Once you have the dev version, you can try the different use cases.
Let’s say I want to compare the list of GH users that currently have issues still open in some of the repos of different organisations for comparaison.
library(gh)
library(dplyr, warn.conflicts = FALSE)
library(tidyr)
Select some repository like tidyr and purrr in the tidyverse or httr, xml2 and usethis in r-lib
repos <- tibble(
org = c("tidyverse", "r-lib"),
repo = list(c("tidyr", "purrr"), c("httr", "xml2", "usethis"))
)
Use gh 📦 to retrieve all open issues
repos <- repos %>%
unnest() %>%
# .limit = "Inf" to get all opened issues
mutate(issues = map2(org, repo, ~ gh("/repos/:org/:repo/issues", org = .x, repo = .y, .limit = "Inf")))
#> Warning: le package 'bindrcpp' a été compilé avec la version R 3.4.4
All the repos do not have the same number of issues open, and there is no reason to.
repos <- repos %>%
mutate(nb_issues = map_int(issues, length))
This is where map_flat
variants can be useful. If we want a result with one
vector of GH login by organisation, map_chr()
won’t work because of length difference,
we need to map then flatten as character (map() %>% flatten()
)
open_issues_by_org <- repos %>%
nest(-org) %>%
mutate(users = map(data,
# extract GH login from nested list we obtained by gh package response
~ map_flat_chr(.x$issues, ~ map_chr(.x, c("user", "login"))) %>% unique()))
open_issues_by_org
#> # A tibble: 2 x 3
#> org data users
#> <chr> <list> <list>
#> 1 tidyverse <tibble [2 x 3]> <chr [50]>
#> 2 r-lib <tibble [3 x 3]> <chr [67]>
We can continue analysis by unnesting and if users open issues in the two organisation
open_issues_by_org %>%
unnest(users, .drop = TRUE) %>%
count(users, sort = TRUE)
#> # A tibble: 109 x 2
#> users n
#> <chr> <int>
#> 1 batpigandme 2
#> 2 cderv 2
#> 3 hadley 2
#> 4 jennybc 2
#> 5 krlmlr 2
#> 6 lorenzwalthert 2
#> 7 romainfrancois 2
#> 8 the-knife 2
#> 9 aaronwolen 1
#> 10 AhuPersonal 1
#> # ... with 99 more rows
If we have some text column we want to split and results with a vector of word, it can be useful as sentences may have diffent number of word.
tibble(
document_id = c(1, 2),
text = list(
c(
"some text with a few word",
"some text with not a few but word",
"some text"
),
c(
"some other text with a few word",
"some other text with not a few but word",
"some other text"
)
)
) %>%
mutate(words = map(text, ~ map_flat_chr(.x, ~ strsplit(.x, " ")[[1]]))) %>%
unnest(words)
#> # A tibble: 35 x 2
#> document_id words
#> <dbl> <chr>
#> 1 1 some
#> 2 1 text
#> 3 1 with
#> 4 1 a
#> 5 1 few
#> 6 1 word
#> 7 1 some
#> 8 1 text
#> 9 1 with
#> 10 1 not
#> # ... with 25 more rows
However, for this use case there is tidytext…
When scrapping some data to rectangle a website, this can be useful. For example, we want to know which are the categories of coming movies for next week.
library(rvest)
#> Le chargement a nécessité le package : xml2
#>
#> Attachement du package : 'xml2'
#> The following object is masked from 'package:purrr':
#>
#> as_list
#>
#> Attachement du package : 'rvest'
#> The following object is masked from 'package:purrr':
#>
#> pluck
url <- "https://www.imdb.com/movies-coming-soon/?ref_=nv_mv_cs_4"
# get list of film coming soon
coming_soon <- url %>%
read_html() %>%
html_nodes(".list_item")
# create a list that contains information about all coming movies
about_all_coming_movies <- list(
nb_film = length(coming_soon),
genre = map_flat_chr(coming_soon, ~ html_nodes(.x, ".cert-runtime-genre span[itemprop='genre']") %>%
html_text()) %>% unique(),
director = coming_soon %>%
map_flat_chr(~ html_nodes(.x, ".txt-block span[itemprop='director'] span[itemprop='name'] a") %>%
html_text()) %>% unique(),
stars = coming_soon %>%
map_flat_chr(~ html_nodes(.x, ".txt-block span[itemprop='actors'] span[itemprop='name'] a") %>%
html_text() %>% unique())
)
about_all_coming_movies
#> $nb_film
#> [1] 9
#>
#> $genre
#> [1] "Action" "Adventure" "Comedy" "Sci-Fi" "Crime"
#> [6] "Family" "Documentary" "Biography" "Drama" "Romance"
#> [11] "Thriller" "Fantasy" "Music"
#>
#> $director
#> [1] "David Leitch" "Bill Holderman" "Raja Gosnell"
#> [4] "Wim Wenders" "Dominic Cooke" "Paul Schrader"
#> [7] "Ron Howard" "John Cameron Mitchell" "Carla Simón"
#>
#> $stars
#> [1] "Josh Brolin" "Morena Baccarin"
#> [3] "Zazie Beetz" "Brianna Hildebrand"
#> [5] "Diane Keaton" "Jane Fonda"
#> [7] "Candice Bergen" "Mary Steenburgen"
#> [9] "Alan Cumming" "Stanley Tucci"
#> [11] "Natasha Lyonne" "Will Arnett"
#> [13] "Pope Francis" "Recep Tayyip Erdogan"
#> [15] "John Kerry" "Angela Merkel"
#> [17] "Saoirse Ronan" "Emily Watson"
#> [19] "Anne-Marie Duff" "Samuel West"
#> [21] "Amanda Seyfried" "Ethan Hawke"
#> [23] "Cedric the Entertainer" "Michael Gaston"
#> [25] "Thandie Newton" "Emilia Clarke"
#> [27] "Paul Bettany" "Alden Ehrenreich"
#> [29] "Elle Fanning" "Nicole Kidman"
#> [31] "Ruth Wilson" "Joanna Scanlan"
#> [33] "Laia Artigas" "Paula Robles"
#> [35] "Bruna Cusí" "David Verdaguer"
Some untidy file to clean. like this one whith one id per line that passes through several nodes. I want the vector or all nodes id.
readr::read_csv("
id,path
1,1-2-6
2,2-3
4,5-7-1-9
") %>%
pull(path) %>%
map_flat_int(~ strsplit(.x, "-")[[1]] %>% as.integer()) %>%
unique()
#> [1] 1 2 6 3 5 7 9