diff --git a/DESCRIPTION b/DESCRIPTION index b30daa1b..f622a0ad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,4 +50,4 @@ Config/testthat/edition: 3 Encoding: UTF-8 Note: libxls v1.6.2 (patched) 45abe77 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/R/read_excel.R b/R/read_excel.R index cf512248..b94fb82f 100644 --- a/R/read_excel.R +++ b/R/read_excel.R @@ -57,13 +57,17 @@ NULL #' read_excel(datasets, "mtcars") #' #' # Skip rows and use default column names -#' read_excel(datasets, skip = 148, col_names = FALSE) +#' read_excel(datasets, skip = 10, col_names = FALSE) #' #' # Recycle a single column type #' read_excel(datasets, col_types = "text") #' #' # Specify some col_types and guess others -#' read_excel(datasets, col_types = c("text", "guess", "numeric", "guess", "guess")) +#' read_excel( +#' readxl_example("deaths.xlsx"), +#' skip = 4, n_max = 10, col_names = TRUE, +#' col_types = c("text", "text", "guess", "guess", "guess", "guess") +#' ) #' #' # Accomodate a column with disparate types via col_type = "list" #' df <- read_excel(readxl_example("clippy.xlsx"), col_types = c("text", "list")) diff --git a/README.Rmd b/README.Rmd index 2a3ca043..3f896e4c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -93,7 +93,7 @@ Specify a worksheet by name or number. ```{r} read_excel(xlsx_example, sheet = "chickwts") -read_excel(xls_example, sheet = 4) +read_excel(xls_example, sheet = 3) ``` There are various ways to control which cells are read. You can even specify the sheet here, if providing an Excel-style cell range. @@ -109,7 +109,7 @@ read_excel(xlsx_example, range = "mtcars!B1:D5") If `NA`s are represented by something other than blank cells, set the `na` argument. ```{r} -read_excel(xlsx_example, na = "setosa") +read_excel(xlsx_example, na = "0") ``` If you are new to the tidyverse conventions for data import, you may want to consult the [data import chapter](https://r4ds.had.co.nz/data-import.html) in R for Data Science. readxl will become increasingly consistent with other packages, such as [readr](https://readr.tidyverse.org/). @@ -149,7 +149,7 @@ Here are some other packages with functionality that is complementary to readxl __Writing Excel files__: The example files `datasets.xlsx` and `datasets.xls` were created with the help of [openxlsx](https://CRAN.R-project.org/package=openxlsx) (and Excel). openxlsx provides "a high level interface to writing, styling and editing worksheets". ```{r eval = FALSE} -l <- list(iris = iris, mtcars = mtcars, chickwts = chickwts, quakes = quakes) +l <- list(mtcars = mtcars, chickwts = chickwts, quakes = quakes) openxlsx::write.xlsx(l, file = "inst/extdata/datasets.xlsx") ``` diff --git a/README.md b/README.md index 544bc94c..a965a2d2 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ readxl_example() #> [5] "deaths.xls" "deaths.xlsx" "geometry.xls" "geometry.xlsx" #> [9] "type-me.xls" "type-me.xlsx" readxl_example("clippy.xls") -#> [1] "/private/tmp/RtmpM1GkLC/temp_libpatha8e46f7f62bf/readxl/extdata/clippy.xls" +#> [1] "/Users/fontikar/Library/R/arm64/4.4/library/readxl/extdata/clippy.xls" ``` `read_excel()` reads both xls and xlsx files and detects the format from @@ -84,30 +84,30 @@ the extension. ``` r xlsx_example <- readxl_example("datasets.xlsx") read_excel(xlsx_example) -#> # A tibble: 150 × 5 -#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species -#> -#> 1 5.1 3.5 1.4 0.2 setosa -#> 2 4.9 3 1.4 0.2 setosa -#> 3 4.7 3.2 1.3 0.2 setosa -#> # ℹ 147 more rows +#> # A tibble: 32 × 11 +#> mpg cyl disp hp drat wt qsec vs am gear carb +#> +#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 +#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 +#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 +#> # ℹ 29 more rows xls_example <- readxl_example("datasets.xls") read_excel(xls_example) -#> # A tibble: 150 × 5 -#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species -#> -#> 1 5.1 3.5 1.4 0.2 setosa -#> 2 4.9 3 1.4 0.2 setosa -#> 3 4.7 3.2 1.3 0.2 setosa -#> # ℹ 147 more rows +#> # A tibble: 32 × 11 +#> mpg cyl disp hp drat wt qsec vs am gear carb +#> +#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 +#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 +#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 +#> # ℹ 29 more rows ``` List the sheet names with `excel_sheets()`. ``` r excel_sheets(xlsx_example) -#> [1] "iris" "mtcars" "chickwts" "quakes" +#> [1] "mtcars" "chickwts" "quakes" ``` Specify a worksheet by name or number. @@ -121,7 +121,7 @@ read_excel(xlsx_example, sheet = "chickwts") #> 2 160 horsebean #> 3 136 horsebean #> # ℹ 68 more rows -read_excel(xls_example, sheet = 4) +read_excel(xls_example, sheet = 3) #> # A tibble: 1,000 × 5 #> lat long depth mag stations #> @@ -136,34 +136,34 @@ specify the sheet here, if providing an Excel-style cell range. ``` r read_excel(xlsx_example, n_max = 3) -#> # A tibble: 3 × 5 -#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species -#> -#> 1 5.1 3.5 1.4 0.2 setosa -#> 2 4.9 3 1.4 0.2 setosa -#> 3 4.7 3.2 1.3 0.2 setosa +#> # A tibble: 3 × 11 +#> mpg cyl disp hp drat wt qsec vs am gear carb +#> +#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 +#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 +#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 read_excel(xlsx_example, range = "C1:E4") #> # A tibble: 3 × 3 -#> Petal.Length Petal.Width Species -#> -#> 1 1.4 0.2 setosa -#> 2 1.4 0.2 setosa -#> 3 1.3 0.2 setosa +#> disp hp drat +#> +#> 1 160 110 3.9 +#> 2 160 110 3.9 +#> 3 108 93 3.85 read_excel(xlsx_example, range = cell_rows(1:4)) -#> # A tibble: 3 × 5 -#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species -#> -#> 1 5.1 3.5 1.4 0.2 setosa -#> 2 4.9 3 1.4 0.2 setosa -#> 3 4.7 3.2 1.3 0.2 setosa +#> # A tibble: 3 × 11 +#> mpg cyl disp hp drat wt qsec vs am gear carb +#> +#> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 +#> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 +#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 read_excel(xlsx_example, range = cell_cols("B:D")) -#> # A tibble: 150 × 3 -#> Sepal.Width Petal.Length Petal.Width -#> -#> 1 3.5 1.4 0.2 -#> 2 3 1.4 0.2 -#> 3 3.2 1.3 0.2 -#> # ℹ 147 more rows +#> # A tibble: 32 × 3 +#> cyl disp hp +#> +#> 1 6 160 110 +#> 2 6 160 110 +#> 3 4 108 93 +#> # ℹ 29 more rows read_excel(xlsx_example, range = "mtcars!B1:D5") #> # A tibble: 4 × 3 #> cyl disp hp @@ -178,14 +178,14 @@ If `NA`s are represented by something other than blank cells, set the `na` argument. ``` r -read_excel(xlsx_example, na = "setosa") -#> # A tibble: 150 × 5 -#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species -#> -#> 1 5.1 3.5 1.4 0.2 -#> 2 4.9 3 1.4 0.2 -#> 3 4.7 3.2 1.3 0.2 -#> # ℹ 147 more rows +read_excel(xlsx_example, na = "0") +#> # A tibble: 32 × 11 +#> mpg cyl disp hp drat wt qsec vs am gear carb +#> +#> 1 21 6 160 110 3.9 2.62 16.5 NA 1 4 4 +#> 2 21 6 160 110 3.9 2.88 17.0 NA 1 4 4 +#> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 +#> # ℹ 29 more rows ``` If you are new to the tidyverse conventions for data import, you may @@ -249,7 +249,7 @@ openxlsx provides “a high level interface to writing, styling and editing worksheets”. ``` r -l <- list(iris = iris, mtcars = mtcars, chickwts = chickwts, quakes = quakes) +l <- list(mtcars = mtcars, chickwts = chickwts, quakes = quakes) openxlsx::write.xlsx(l, file = "inst/extdata/datasets.xlsx") ``` diff --git a/inst/extdata/datasets.xls b/inst/extdata/datasets.xls index bc0933e9..52f7eecb 100644 Binary files a/inst/extdata/datasets.xls and b/inst/extdata/datasets.xls differ diff --git a/inst/extdata/datasets.xlsx b/inst/extdata/datasets.xlsx index 21b0eff5..d4e704bd 100644 Binary files a/inst/extdata/datasets.xlsx and b/inst/extdata/datasets.xlsx differ diff --git a/inst/extdata/deaths.xls b/inst/extdata/deaths.xls index bb391042..e1060381 100644 Binary files a/inst/extdata/deaths.xls and b/inst/extdata/deaths.xls differ diff --git a/man/read_excel.Rd b/man/read_excel.Rd index a7aa7fd8..197bb355 100644 --- a/man/read_excel.Rd +++ b/man/read_excel.Rd @@ -125,13 +125,17 @@ read_excel(datasets, 2) read_excel(datasets, "mtcars") # Skip rows and use default column names -read_excel(datasets, skip = 148, col_names = FALSE) +read_excel(datasets, skip = 10, col_names = FALSE) # Recycle a single column type read_excel(datasets, col_types = "text") # Specify some col_types and guess others -read_excel(datasets, col_types = c("text", "guess", "numeric", "guess", "guess")) +read_excel( + readxl_example("deaths.xlsx"), + skip = 4, n_max = 10, col_names = TRUE, + col_types = c("text", "text", "guess", "guess", "guess", "guess") +) # Accomodate a column with disparate types via col_type = "list" df <- read_excel(readxl_example("clippy.xlsx"), col_types = c("text", "list")) diff --git a/vignettes/articles/readxl-workflows.Rmd b/vignettes/articles/readxl-workflows.Rmd index 7a7c39c8..945fc012 100644 --- a/vignettes/articles/readxl-workflows.Rmd +++ b/vignettes/articles/readxl-workflows.Rmd @@ -47,13 +47,13 @@ Solution: cache a CSV snapshot of your raw data tables at the time of export. Ev Pipe the output of `read_excel()` directly into `readr::write_csv()` like so: ```{r} -iris_xl <- readxl_example("datasets.xlsx") %>% - read_excel(sheet = "iris") %>% - write_csv("iris-raw.csv") +mtcars_xl <- readxl_example("datasets.xlsx") %>% + read_excel(sheet = "mtcars") %>% + write_csv("mtcars-raw.csv") ``` ```{r include = FALSE} -delete_on_exit <- c(delete_on_exit, "iris-raw.csv") +delete_on_exit <- c(delete_on_exit, "mtcars-raw.csv") ``` Why does this work? `readr::write_csv()` is a well-mannered "write" function: it does its main job *and returns its input invisibly*. The above command reads the iris sheet from readxl's `datasets.xlsx` example workbook and caches a CSV version of the resulting data frame to file. @@ -61,18 +61,18 @@ Why does this work? `readr::write_csv()` is a well-mannered "write" function: it Let's check. Did we still import the data? Did we write the CSV file? ```{r} -iris_xl -dir(pattern = "iris") +mtcars_xl +dir(pattern = "mtcars") ``` Yes! Is the data written to CSV an exact copy of what we imported from Excel? ```{r} -iris_alt <- read_csv("iris-raw.csv") +mtcars_alt <- read_csv("mtcars-raw.csv") ## readr leaves a note-to-self in `spec` that records its column guessing, ## so we remove that attribute before the check -attr(iris_alt, "spec") <- NULL -identical(iris_xl, iris_alt) +attr(mtcars_alt, "spec") <- NULL +identical(mtcars_xl, mtcars_alt) ``` Yes! If we needed to restart or troubleshoot this fictional analysis, `iris-raw.csv` is available as a second, highly accessible alternative to `datasets.xlsx`. @@ -193,11 +193,11 @@ Rework examples from above but using base R only, other than readxl. ### Cache a CSV snapshot ```{r eval = FALSE} -iris_xl <- read_excel(readxl_example("datasets.xlsx"), sheet = "iris") -write.csv(iris_xl, "iris-raw.csv", row.names = FALSE, quote = FALSE) -iris_alt <- read.csv("iris-raw.csv", stringsAsFactors = FALSE) +mtcars_xl <- read_excel(readxl_example("datasets.xlsx"), sheet = "mtcars") +write.csv(iris_xl, "mtcars-raw.csv", row.names = FALSE, quote = FALSE) +mtcars_alt <- read.csv("mtcars-raw.csv", stringsAsFactors = FALSE) ## coerce iris_xl back to a data.frame -identical(as.data.frame(iris_xl), iris_alt) +identical(as.data.frame(mtcars_xl), mtcars_alt) ``` ### Iterate over multiple worksheets in a workbook