Skip to content

Commit

Permalink
Merge pull request #253 from rOpenGov/pitkant-2
Browse files Browse the repository at this point in the history
Use new API instead of old bulk download
  • Loading branch information
antagomir authored Feb 12, 2023
2 parents 1c6086c + d327405 commit 999f039
Show file tree
Hide file tree
Showing 40 changed files with 4,031 additions and 283 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
^renv$
^renv\.lock$
^CRAN-RELEASE$
# Extra material related but not to be included in the package
inst/extras
Expand Down
1 change: 1 addition & 0 deletions .Rprofile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
source("renv/activate.R")
6 changes: 3 additions & 3 deletions .github/workflows/render-readme.yaml → .github/workflows/render-rmarkdown.yaml
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
workflow_dispatch:
push:
paths: ['README.Rmd']
paths: ['**.Rmd']
workflow_dispatch:

name: render-rmarkdown

Expand Down Expand Up @@ -31,4 +31,4 @@ jobs:
git config --local user.name "$GITHUB_ACTOR"
git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
git commit ${RMD_PATH[*]/.Rmd/.md} -m 'Re-build Rmarkdown files' || echo "No changes to commit"
git push origin || echo "No changes to commit"
git push origin || echo "No changes to commit"
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Type: Package
Package: eurostat
Title: Tools for Eurostat Open Data
Version: 3.7.13
Date: 2022-06-28
Version: 3.7.14
Date: 2023-02-09
Authors@R: c(
person(given = "Leo",
family = "Lahti",
Expand Down Expand Up @@ -79,7 +79,8 @@ Imports:
stringi,
stringr,
tibble,
tidyr (>= 1.0.0)
tidyr (>= 1.0.0),
ISOweek
Suggests:
RColorBrewer,
knitr,
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ export(clean_eurostat_cache)
export(cut_to_classes)
export(dic_order)
export(eurotime2date)
export(eurotime2date2)
export(eurotime2num)
export(eurotime2num2)
export(get_bibentry)
export(get_eurostat)
export(get_eurostat_dic)
Expand All @@ -17,6 +19,7 @@ export(grepEurostatTOC)
export(harmonize_country_code)
export(harmonize_geo_code)
export(label_eurostat)
export(label_eurostat2)
export(label_eurostat_tables)
export(label_eurostat_vars)
export(recode_nuts)
Expand All @@ -26,6 +29,7 @@ export(search_eurostat)
export(set_eurostat_cache_dir)
export(validate_geo_code)
export(validate_nuts_regions)
importFrom(ISOweek,ISOweek2date)
importFrom(RefManageR,BibEntry)
importFrom(RefManageR,toBiblatex)
importFrom(broom,tidy)
Expand Down Expand Up @@ -61,6 +65,7 @@ importFrom(stringr,str_replace_all)
importFrom(tibble,as_tibble)
importFrom(tibble,is_tibble)
importFrom(tidyr,gather)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,separate)
importFrom(utils,data)
importFrom(utils,download.file)
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# eurostat 3.7.14 (2023-02-08)

* Updated `get_eurostat` and its assorted functions to download data from the new dissemination API (related to issues #251, #243). See Eurostat web page Transition - from Eurostat Bulk Download to API for a list of differences between old and new data sources: https://wikis.ec.europa.eu/display/EUROSTATHELP/Transition+-+from+Eurostat+Bulk+Download+to+API
* Added new temporary functions for downloading and handling data from the new dissemination API: `get_eurostat_raw2`, `tidy_eurostat2`, `convert_time_col2`, `eurotime2date2`, `eurotime2num2` and `label_eurostat2`. When the old bulk download facilities are decommissioned, these functions will replace the old functions with old naming schemes, without the 2's.
* `tidy_eurostat2` function is now able to handle multiple time frequencies in one call: For example, you can download annual, quarterly, and monthly data simply by using a vector c("A", "Q", "M") in select_time instead of using these singular frequencies in separate calls. The function will also return multiple time series in one dataset if select_time is NULL (as it is by default). If the dataset contains multiple time series and these are explicitly downloaded / no select_time parameter is given, a message will be printed.
* `eurotime2num` can now handle monthly and weekly data as well.
* Added a new parameter to `get_eurostat` function: legacy_bulk_download (default = TRUE). By setting this parameter to FALSE the user can download data from the new dissemination API. If you want to test the new API before it becomes the only way to download the data (and we very much encourage you to do so), set this parameter to FALSE.

# eurostat 3.7.13 (2023-02-01)

* Updated `get_eurostat_json` to migrate from JSON web service to API Statistics (addressed in issues #243, #251). Please note that the output from JSON API is now slightly different than before: the datasets now contain a freq column to indicate the frequency with which data has been collected, for example annually "A", monthly "M" or quarterly "Q". See Eurostat - Data browser online help website for more information: https://wikis.ec.europa.eu/display/EUROSTATHELP/API+Statistics+-+migrating+from+JSON+web+service+to+API+Statistics
Expand Down
114 changes: 114 additions & 0 deletions R/eurotime2date.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,117 @@ eurotime2date <- function(x, last = FALSE) {
y <- lubridate::ymd(x)
y
}

#' @title Date Conversion from New Eurostat Time Format
#' @description
#' Date conversion from Eurostat time format. A function to
#' convert Eurostat time values to objects of class [Date()]
#' representing calendar dates.
#' @details
#' Available patterns are YYYY (year), YYYY-SN (semester), YYYY-QN (quarter),
#' YYYY-MM (month), YYYY-WNN (week) and YYYY-MM-DD (day).
#' @param x a charter string with time information in Eurostat time format.
#' @param last a logical. If `FALSE` (default) the date is
#' the first date of the period (month, quarter or year). If `TRUE`
#' the date is the last date of the period.
#' @references
#' See `citation("eurostat")`:
#'
#' ```{r, echo=FALSE, comment="#" }
#' citation("eurostat")
#' ```
#'
#' @return an object of class [Date()].
#' @author Janne Huovari <janne.huovari@@ptt.fi>
#' @family helpers
#' @seealso [lubridate::ymd()]
#' @examplesIf check_access_to_data()
#' \donttest{
#' na_q <- get_eurostat("namq_10_pc", time_format = "raw")
#' na_q$time <- eurotime2date(x = na_q$time)
#' unique(na_q$time)
#' }
#'
#' \dontrun{
#' # Test for weekly data
#' get_eurostat(
#' id = "lfsi_abs_w",
#' select_time = c("W"),
#' time_format = "date",
#' legacy_bulk_download = FALSE
#' )
#' }
#'
#' @importFrom lubridate ymd
#' @importFrom ISOweek ISOweek2date
#'
#' @export
eurotime2date2 <- function(x, last = FALSE) {
if (!is.factor(x)) x <- factor(x)
unique_times <- levels(x)
year <- substr(unique_times, 1, 4)
# 5th character is always "-", if there is one
#from "YYYY-QN" the following line would extract "QN", from "YYYY-WNN" "WNN"
subyear <- substr(unique_times, 6, 8)
# the first character tells the type of the date
tcode <- substr(subyear[1], 1, 1)
if (tcode != "_" && nchar(unique_times[1]) > 8) {
days <- substr(unique_times, 8, 10) #extract -DD from YYYY-MM-DD
tcode <- substr(days[1], 1, 1) # tcode for daily data is "-"
}

if (tcode == "") tcode <- "Y"

day <- "01" # default for day

# for yearly data
if (tcode == "Y") {
period <- "01"
# for bi-annual
} else if (tcode == "S") {
lookup <- c(S1 = "01", S2 = "07")
period <- lookup[subyear]
# for quarterly
} else if (tcode == "Q") {
lookup <- c(Q1 = "01", Q2 = "04", Q3 = "07", Q4 = "10")
period <- lookup[subyear]
# for montly
} else if (tcode == "0" || tcode == "1") {
period <- gsub("M", "", subyear)
# for weekly
} else if (tcode == "W") {
# We need period to be of format "WNN", e.g. W01 for 1st week of the year
period <- subyear
# for daily
} else if (tcode == "-") {
period <- gsub("M", "", subyear)
day <- gsub("D", "", days)
} else {
warning(
"Unknown time code, ", tcode, ". No date conversion was made.\n
Please fill bug report at ",
"https://github.com/rOpenGov/eurostat/issues."
)
return(x)
}

levels(x) <- paste0(year, "-", period, "-", day)

# The date as the last date of the period
if (tcode == "W") {
# we will be using range 1-7 here, not 01-07
day <- ifelse(last == TRUE, 7, 1)
levels(x) <- paste0(year, "-", period, "-", day)
x <- ISOweek::ISOweek2date(x)
return(x)
}
# For times other than weeks
if (last == TRUE && tcode != "W") {
shift <- c("Y" = 367, "S" = 186, "Q" = 96, "0" = 32, "1" = 32, "D" = 0)[tcode]
levels(x) <- lubridate::ymd(
cut(lubridate::ymd(levels(x)) + shift, "month")
) - 1
}
y <- lubridate::ymd(x)
y
}
68 changes: 68 additions & 0 deletions R/eurotime2num.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,71 @@ eurotime2num <- function(x) {
y <- as.numeric(as.character(x))
y
}

#' @title Conversion of Eurostat Time Format to Numeric
#' @description A conversion of a Eurostat time format to numeric.
#' @details
#' Bi-annual (semester), quarterly, monthly and weekly data can be presented as
#' a fraction of the year in beginning of the period. Conversion of daily data
#' is not supported.
#' @param x a charter string with time information in Eurostat time format.
#' @return see [as.numeric()].
#' @author Janne Huovari <janne.huovari@@ptt.fi>, Pyry Kantanen
#' @family helpers
#' @examplesIf check_access_to_data()
#' \donttest{
#' na_q <- get_eurostat("namq_10_pc", time_format = "raw")
#' na_q$time <- eurotime2num(x = na_q$time)
#'
#' unique(na_q$time)
#' }
#'
#' @export
eurotime2num2 <- function(x) {
x <- as.factor(x)
times <- levels(x)

if (nchar(times[1]) > 8) {
# Finds the only format that is longer than YYYY-WNN (weeks, 8 chars)
# Day/date notation: YYYY-MM-DD, 10 chars
# tcode <- substr(times[1], 8, 8)
tcode <- "D"
} else {
# Possible tcodes: S, Q, 0 or 1 (months), W
# tcode: type of time data
tcode <- substr(times[1], 6, 6)
# if tcode is empty, the data is probably annual
if (tcode == "0" || tcode == "1") {
tcode <- "M"
} else if (tcode == "") {
tcode <- "A"
}
}


# check input type
if (!(tcode %in% c("A", "S", "Q", "M", "W", "D"))) {

# for daily
if (tcode == "D") {
warning("Time format is daily data. No numeric conversion was made.")
} else {
warning("Unknown time code, ", tcode, ". No numeric conversion was made.\n
Please fill bug report at https://github.com/rOpenGov/eurostat/issues.")
}

return(x)
}

year <- substr(times, 1, 4)
subyear <- substr(times, 6, 8)
# The only characters that can be present are S, Q and W
subyear <- gsub("[SQW]", "", subyear)

subyear[subyear == ""] <- 1

levels(x) <- as.numeric(year) +
(as.numeric(subyear) - 1) * 1 / c(A = 1, S = 2, Q = 4, M = 12, W = 53)[tcode]
y <- as.numeric(as.character(x))
y
}
Loading

0 comments on commit 999f039

Please sign in to comment.