diff --git a/.Rbuildignore b/.Rbuildignore index 780da5b..9188ade 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -25,3 +25,4 @@ notes.R ^LICENSE.note$ ^.editorconfig$ ^tests/testdata/issue81.ods$ +^Makefile$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index bed8865..c270f85 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -22,7 +22,7 @@ jobs: config: - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - - {os: windows-latest, r: '4.0'} + - {os: ubuntu-latest, r: 'oldrel-4'} ## minimum version - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..ce48fa6 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "iosfwd": "cpp" + } +} \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 1b030e6..551313e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: readODS Type: Package Title: Read and Write ODS Files -Version: 2.3.2 +Version: 2.3.4 Authors@R: c(person("Gerrit-Jan", "Schutten", role = c("aut"), email = "phonixor@gmail.com"), person("Chung-hong", "Chan", role = c("aut", "cre"), email = "chainsawtiney@gmail.com", comment = c(ORCID = "0000-0002-6232-7530")), @@ -19,6 +19,7 @@ Authors@R: person("Matt", "Kerlogue", role = c("ctb")), person("Michal", "Lauer", role = c("ctb"), email = "michal.lauer.25@gmail.com"), person("Till", "Straube", role = c("ctb"), email = "straube@geo.uni-frankfurt.de"), + person("Mauricio", "Vargas Sepulveda", role = c("ctb"), email = "m.vargas.sepulveda@gmail.com"), person("Marcin", "Kalicinski", role = c("ctb", "cph"), comment = "Author of included RapidXML code")) Description: Read ODS (OpenDocument Spreadsheet) into R as data frame. Also support writing data frame into ODS file. URL: https://docs.ropensci.org/readODS/, https://github.com/ropensci/readODS @@ -48,5 +49,5 @@ Roxygen: list(markdown = TRUE) Encoding: UTF-8 VignetteBuilder: knitr Depends: - R (>= 4.0) + R (>= 4.1) Language: en-GB diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..95c10a6 --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +clean: + @Rscript -e 'devtools::clean_dll()' + +test: + @Rscript -e 'devtools::clean_dll()' + @Rscript -e 'devtools::load_all()' + +check: + @echo "Local" + @Rscript -e 'devtools::install()' + @Rscript -e 'devtools::check()' + +site: + @Rscript -e 'devtools::document()' + @Rscript -e 'pkgdown::build_site()' + +install: + @Rscript -e 'devtools::clean_dll()' + @Rscript -e 'devtools::install()' + +clang_format=`which clang-format-18` + +format: $(shell find . -name '*.h') $(shell find . -name '*.hpp') $(shell find . -name '*.cpp') + @${clang_format} -i $? diff --git a/NAMESPACE b/NAMESPACE index 911f4ce..5f93fbd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,8 +3,10 @@ export(list_fods_sheets) export(list_ods_sheets) export(ods_sheets) +export(readODS_progress) export(read_fods) export(read_ods) export(write_fods) export(write_ods) +importFrom(utils,flush.console) useDynLib(readODS, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md index c449a3c..e2ee757 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,14 @@ +# readODS 2.3.4 + +* Matches `readxl` parameters +* Adds progress bar for reading functions +* Rewritten C+ parts for large files +* Implements #154 + +# readODS 2.3.3 + +* Fix #213, prevent error when `set.seed` for `write_ods()` + # readODS 2.3.2 * Fix #207, trim off large or unused test files diff --git a/R/readODS-package.R b/R/readODS-package.R index 20c0bd7..160cac2 100644 --- a/R/readODS-package.R +++ b/R/readODS-package.R @@ -1,6 +1,5 @@ #' @useDynLib readODS, .registration = TRUE -NULL - +#' @importFrom utils flush.console #' @keywords internal "_PACKAGE" diff --git a/R/read_ods.R b/R/read_ods.R index 8e7deef..5de1fa1 100644 --- a/R/read_ods.R +++ b/R/read_ods.R @@ -1,3 +1,112 @@ +#' Determine whether to show progress bar +#' +#' By default, readODS displays a progress bar unless one of the following is TRUE: +#' - The progress is explicitly disabled by setting options(readODS.show_progress = FALSE). +#' - The code is run in a non-interactive session (interactive() is FALSE). +#' - The code is run by knitr / rmarkdown. +#' +#' @return logical indicating whether to show progress +#' @export +readODS_progress <- function() { + # Check if explicitly disabled + if (isFALSE(getOption("readODS.show_progress", TRUE))) { + return(FALSE) + } + + # Don't show in non-interactive sessions + # make mockable (see utils.R) + if (!.is_interactive()) { + return(FALSE) + } + + # Don't show when knitting + if (isTRUE(getOption("knitr.in.progress", FALSE))) { + return(FALSE) + } + + # Don't show in RStudio notebook chunks + if (Sys.getenv("RSTUDIO_NOTEBOOK") != "") { + return(FALSE) + } + + return(TRUE) +} + +.create_progress_bar <- function(progress, total_work = 100) { + if (!isTRUE(progress)) { + # Return a no-op progress bar with zero overhead + noop <- function(...) invisible(NULL) + return(list(tick = noop, terminate = noop, update = noop)) + } + + # Efficient progress bar with minimal overhead + start_time <- Sys.time() + current_progress <- 0L + last_displayed_percent <- -1L + + list( + tick = function() { + current_progress <<- current_progress + 1L + percent <- as.integer((current_progress / total_work) * 100) + if (percent != last_displayed_percent && (percent %% 10L == 0L || percent >= 100L)) { + last_displayed_percent <<- percent + .update_progress_display(current_progress, total_work, start_time) + } + }, + terminate = function() { + if (current_progress > 0L) { + .update_progress_display(total_work, total_work, start_time) + cat(" +") + } + }, + update = function(current = NULL) { + if (!is.null(current)) { + current_progress <<- as.integer(current) + percent <- as.integer((current_progress / total_work) * 100) + if (percent != last_displayed_percent) { + last_displayed_percent <<- percent + .update_progress_display(current_progress, total_work, start_time) + } + } + } + ) +} + +.update_progress_display <- function(current, total, start_time) { + percent <- min(100L, as.integer((current / total) * 100)) + bar_width <- 50L + filled_width <- as.integer((percent / 100) * bar_width) + + # Pre-allocate strings for efficiency + equals <- strrep("=", filled_width) + spaces <- strrep(" ", bar_width - filled_width) + + elapsed <- as.numeric(difftime(Sys.time(), start_time, units = "secs")) + if (elapsed > 0 && current > 0) { + rate <- current / elapsed + if (rate > 0) { + eta <- (total - current) / rate + eta_str <- if (eta < 60) { + sprintf("ETA: %ds", round(eta)) + } else { + sprintf("ETA: %dm", round(eta / 60)) + } + } else { + eta_str <- "" + } + } else { + eta_str <- "" + } + + # Construct progress bar display + bar <- sprintf("[%s%s]", equals, spaces) + + # Use carriage return to overwrite the same line + cat(sprintf("\r%s %3d%% %s", bar, percent, eta_str)) + flush.console() +} + .return_zerorow <- function(x, row_header, .name_repair) { jcol <- ifelse(row_header, 2, 1) col_n <- vctrs::vec_as_names(as.character(x[1,jcol:ncol(x)]), repair = .name_repair) @@ -80,7 +189,9 @@ verbose = FALSE, as_tibble = TRUE, trim_ws = TRUE, - n_max = Inf) { + n_max = Inf, + guess_max = min(1000, n_max), + progress = readODS_progress()) { if (!file.exists(path)) { stop("file does not exist", call. = FALSE) } @@ -119,6 +230,12 @@ if (!is.numeric(n_max)) { stop("n_max must be numeric.", call. = FALSE) } + if (!is.numeric(guess_max)) { + stop("guess_max must be numeric.", call. = FALSE) + } + if (!is.logical(progress)) { + stop("progress must be of type `boolean`", call. = FALSE) + } } .return_empty <- function(as_tibble = FALSE) { @@ -131,11 +248,11 @@ return(data.frame()) } -.handle_col_types <- function(res, col_types, verbose, na, trim_ws) { +.handle_col_types <- function(res, col_types, verbose, na, trim_ws, guess_max = NA) { if (isTRUE(is.na(col_types)) || nrow(res) == 0) { return(res) } - minty::type_convert(df = res, col_types = col_types, verbose = verbose, na = na, trim_ws = trim_ws) + minty::type_convert(df = res, col_types = col_types, verbose = verbose, na = na, trim_ws = trim_ws, guess_max = guess_max) } ## standardise `sheet` parameter as a number, i.e. sheet_index @@ -180,7 +297,9 @@ .name_repair = "unique", flat = FALSE, trim_ws = TRUE, - n_max = Inf) { + n_max = Inf, + guess_max = min(1000, n_max), + progress = readODS_progress()) { .check_read_args(path, sheet, col_names, @@ -194,8 +313,14 @@ verbose, as_tibble, trim_ws, - n_max) + n_max, + guess_max, + progress) path <- normalizePath(path) + + # Initialize progress bar + pb <- .create_progress_bar(progress, total_work = 5) + if (flat) { .get_sheet_names_func <- get_flat_sheet_names_ .read_ods_func <- read_flat_ods_ @@ -203,10 +328,16 @@ .get_sheet_names_func <- get_sheet_names_ .read_ods_func <- read_ods_ } + + pb$tick() # Step 1: Setup complete ## Get cell range info limits <- .standardise_limits(range, skip, n_max) + pb$tick() # Step 2: Range calculated + sheet_index <- .standardise_sheet(sheet = sheet, sheet_names = .get_sheet_names_func(file = path, include_external_data = TRUE), range = range) + pb$tick() # Step 3: Sheet identified + strings <- .read_ods_func(file = path, start_row = limits["min_row"], stop_row = limits["max_row"], @@ -214,13 +345,16 @@ stop_col = limits["max_col"], sheet_index = sheet_index, formula_as_formula = formula_as_formula) + pb$tick() # Step 4: Raw data read if (((strings[1] == 0 || strings[2] == 0)) && isTRUE(getOption("readODS.v200", FALSE))) { + pb$terminate() return(.return_empty(as_tibble = as_tibble)) } if (((strings[1] == 0 || strings[2] == 0) || (strings[1] == 1 && row_names)) && isFALSE(getOption("readODS.v200", FALSE))) { + pb$terminate() return(.return_empty(as_tibble = as_tibble)) } res <- as.data.frame( @@ -230,7 +364,7 @@ byrow = TRUE), stringsAsFactors = FALSE) res <- .change_df_with_col_row_header(x = res, col_header = col_names, row_header = row_names, .name_repair = .name_repair) - res <- .handle_col_types(res, col_types = col_types, verbose = verbose, na = na, trim_ws = trim_ws) + res <- .handle_col_types(res, col_types = col_types, verbose = verbose, na = na, trim_ws = trim_ws, guess_max = guess_max) if (strings_as_factors) { res <- .convert_strings_to_factors(df = res) } @@ -240,6 +374,10 @@ if (as_tibble) { res <- tibble::as_tibble(x = res, .name_repair = .name_repair) } + + pb$tick() # Step 5: Data processed + pb$terminate() # Finish progress bar + return(res) } @@ -305,6 +443,8 @@ #' "magic number". #' @param trim_ws logical, should leading and trailing whitespace be trimmed? #' @param n_max numeric, Maximum number of data rows to read. Ignored if `range` is given. +#' @param guess_max numeric, Maximum number of data rows to use for guessing column types. Defaults to min(1000, n_max). +#' @param progress logical, Display a progress bar? By default, shows a progress bar in interactive sessions unless disabled. See \code{\link{readODS_progress}} for more details. #' @return A tibble (\code{tibble}) or data frame (\code{data.frame}) containing a representation of data in the (f)ods file. #' @author Peter Brohan , Chung-hong Chan , Gerrit-Jan Schutten #' @examples @@ -347,7 +487,9 @@ read_ods <- function(path, ods_format = c("auto", "ods", "fods"), guess = FALSE, trim_ws = TRUE, - n_max = Inf) { + n_max = Inf, + guess_max = min(1000, n_max), + progress = readODS_progress()) { ods_format <- .determine_ods_format(path = path, guess = guess, ods_format = match.arg(ods_format)) ## Should use match.call but there's a weird bug if one of the variable names is 'file' .read_ods(path = path, @@ -365,7 +507,9 @@ read_ods <- function(path, .name_repair = .name_repair, flat = ods_format == "fods", trim_ws = trim_ws, - n_max = n_max) + n_max = n_max, + guess_max = guess_max, + progress = progress) } #' @rdname read_ods @@ -384,7 +528,9 @@ read_fods <- function(path, as_tibble = TRUE, .name_repair = "unique", trim_ws = TRUE, - n_max = Inf) { + n_max = Inf, + guess_max = min(1000, n_max), + progress = readODS_progress()) { ## Should use match.call but there's a weird bug if one of the variable names is 'file' .read_ods(path = normalizePath(path, mustWork = FALSE), sheet = sheet, @@ -401,5 +547,7 @@ read_fods <- function(path, .name_repair = .name_repair, flat = TRUE, trim_ws = trim_ws, - n_max = n_max) + n_max = n_max, + guess_max = guess_max, + progress = progress) } diff --git a/R/utils.R b/R/utils.R index 3f669e5..31854b6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -35,3 +35,7 @@ check_nonnegative_integer <- function(x, argument) { ## if ncol == 0, without as.character would return `logical(0)` as.character(ifelse(unlist(lapply(x, function(x) class(x)[1])) %in% c("integer", "numeric"), "float", "string")) } + +.is_interactive <- function(...) { + interactive() +} diff --git a/R/write_ods.R b/R/write_ods.R index f9b0753..adf863d 100644 --- a/R/write_ods.R +++ b/R/write_ods.R @@ -92,7 +92,7 @@ if (isFALSE(flat)) { temp_ods_dir <- file.path(tempdir(), stringi::stri_rand_strings(1, 30, pattern = "[A-Za-z0-9]")) dir.create(temp_ods_dir) - on.exit(unlink(temp_ods_dir)) + on.exit(unlink(temp_ods_dir, recursive = TRUE)) } if (append || update) { .update_ods(x = .preprocess_x(x), path = path, sheet = sheet, append = append, update = update, row_names = row_names, diff --git a/benchmark/issue81_template.md b/benchmark/issue81_template.md index a0c9a68..7abda1c 100644 --- a/benchmark/issue81_template.md +++ b/benchmark/issue81_template.md @@ -5,7 +5,7 @@ date() ``` - [1] "Tue Jun 4 18:39:24 2024" + [1] "Wed Nov 19 20:06:47 2025" ``` r devtools::load_all() @@ -19,7 +19,7 @@ system.time(x <- read_ods(file, sheet = 2, skip = 4)) ``` user system elapsed - 0.407 0.051 0.462 + 0.428 0.059 0.488 ``` r dim(x) @@ -31,13 +31,13 @@ dim(x) sessionInfo() ``` - R version 4.4.0 (2024-04-24) + R version 4.5.2 (2025-10-31) Platform: x86_64-pc-linux-gnu - Running under: Ubuntu 22.04.4 LTS + Running under: Ubuntu 22.04.5 LTS Matrix products: default - BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 - LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 + BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 + LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C @@ -54,21 +54,20 @@ sessionInfo() [1] stats graphics grDevices utils datasets methods base other attached packages: - [1] readODS_2.3.1 testthat_3.2.1 + [1] readODS_2.3.4 testthat_3.2.3 loaded via a namespace (and not attached): - [1] utf8_1.2.4 stringi_1.8.4 digest_0.6.35 magrittr_2.0.3 - [5] evaluate_0.23 pkgload_1.3.4 fastmap_1.1.1 cellranger_1.1.0 - [9] rprojroot_2.0.4 jsonlite_1.8.8 zip_2.3.1 pkgbuild_1.4.4 - [13] sessioninfo_1.2.2 brio_1.1.4 urlchecker_1.0.1 promises_1.3.0 - [17] purrr_1.0.2 fansi_1.0.6 cli_3.6.2 shiny_1.8.1.1 - [21] rlang_1.1.4 ellipsis_0.3.2 remotes_2.5.0 withr_3.0.0 - [25] cachem_1.0.8 yaml_2.3.8 devtools_2.4.5 tools_4.4.0 - [29] tzdb_0.4.0 memoise_2.0.1 httpuv_1.6.15 here_1.0.1 - [33] vctrs_0.6.5 R6_2.5.1 mime_0.12 lifecycle_1.0.4 - [37] minty_0.0.1 stringr_1.5.1 fs_1.6.3 htmlwidgets_1.6.4 - [41] usethis_2.2.3 miniUI_0.1.1.1 pkgconfig_2.0.3 desc_1.4.3 - [45] pillar_1.9.0 later_1.3.2 glue_1.7.0 profvis_0.3.8 - [49] Rcpp_1.0.12 xfun_0.43 tibble_3.2.1 rstudioapi_0.16.0 - [53] knitr_1.46 xtable_1.8-4 htmltools_0.5.8.1 rmarkdown_2.26 - [57] compiler_4.4.0 + [1] miniUI_0.1.2 jsonlite_2.0.0 compiler_4.5.2 brio_1.1.5 + [5] promises_1.3.3 zip_2.3.3 Rcpp_1.1.0 later_1.4.4 + [9] yaml_2.3.10 fastmap_1.2.0 here_1.0.1 mime_0.13 + [13] R6_2.5.1 knitr_1.50 htmlwidgets_1.6.4 tibble_3.2.1 + [17] desc_1.4.3 profvis_0.4.0 rprojroot_2.1.1 shiny_1.11.1 + [21] tzdb_0.4.0 pillar_1.11.1 rlang_1.1.4 stringi_1.8.4 + [25] cachem_1.1.0 httpuv_1.6.16 xfun_0.53 fs_1.6.6 + [29] pkgload_1.4.0 memoise_2.0.1 cli_3.6.3 withr_3.0.1 + [33] magrittr_2.0.3 digest_0.6.37 rstudioapi_0.17.1 xtable_1.8-4 + [37] remotes_2.5.0 devtools_2.4.5 lifecycle_1.0.4 vctrs_0.6.5 + [41] minty_0.0.5 evaluate_1.0.5 glue_1.8.0 cellranger_1.1.0 + [45] urlchecker_1.0.1 sessioninfo_1.2.3 pkgbuild_1.4.8 rmarkdown_2.29 + [49] purrr_1.0.2 pkgconfig_2.0.3 tools_4.5.2 usethis_3.1.0 + [53] ellipsis_0.3.2 htmltools_0.5.8.1 diff --git a/benchmark/roundtrip.md b/benchmark/roundtrip.md index 6092bd4..74bb8cb 100644 --- a/benchmark/roundtrip.md +++ b/benchmark/roundtrip.md @@ -16,7 +16,7 @@ Let’s break it down date() ``` - [1] "Tue Jun 4 18:38:20 2024" + [1] "Wed Nov 19 20:05:32 2025" ``` r library(nycflights13) @@ -24,14 +24,14 @@ system.time(path <- writexl::write_xlsx(flights)) ``` user system elapsed - 6.136 0.240 6.386 + 6.389 0.325 6.726 ``` r system.time(out <- readxl::read_xlsx(path)) ``` user system elapsed - 2.280 0.588 2.877 + 2.248 0.819 3.072 ``` r all.equal(out, flights) @@ -46,43 +46,20 @@ devtools::load_all() ``` ℹ Loading readODS - ℹ Re-compiling readODS (debug build) - - ── R CMD INSTALL ─────────────────────────────────────────────────────────────── - * installing *source* package ‘readODS’ ... - ** using staged installation - ** libs - using C++ compiler: ‘g++ (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0’ - make[1]: Entering directory '/home/chainsawriot/dev/readODS/src' - make[1]: Leaving directory '/home/chainsawriot/dev/readODS/src' - make[1]: Entering directory '/home/chainsawriot/dev/readODS/src' - g++ -std=gnu++17 -I"/usr/share/R/include" -DNDEBUG -I../inst/include -fpic -g -O2 -ffile-prefix-map=/build/r-base-H8urij/r-base-4.4.0=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -UNDEBUG -Wall -pedantic -g -O0 -c cpp11.cpp -o cpp11.o - g++ -std=gnu++17 -I"/usr/share/R/include" -DNDEBUG -I../inst/include -fpic -g -O2 -ffile-prefix-map=/build/r-base-H8urij/r-base-4.4.0=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -UNDEBUG -Wall -pedantic -g -O0 -c get_sheet_names.cpp -o get_sheet_names.o - g++ -std=gnu++17 -I"/usr/share/R/include" -DNDEBUG -I../inst/include -fpic -g -O2 -ffile-prefix-map=/build/r-base-H8urij/r-base-4.4.0=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -UNDEBUG -Wall -pedantic -g -O0 -c is_ods.cpp -o is_ods.o - g++ -std=gnu++17 -I"/usr/share/R/include" -DNDEBUG -I../inst/include -fpic -g -O2 -ffile-prefix-map=/build/r-base-H8urij/r-base-4.4.0=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -UNDEBUG -Wall -pedantic -g -O0 -c read_flat_ods_.cpp -o read_flat_ods_.o - g++ -std=gnu++17 -I"/usr/share/R/include" -DNDEBUG -I../inst/include -fpic -g -O2 -ffile-prefix-map=/build/r-base-H8urij/r-base-4.4.0=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -UNDEBUG -Wall -pedantic -g -O0 -c read_ods_.cpp -o read_ods_.o - g++ -std=gnu++17 -I"/usr/share/R/include" -DNDEBUG -I../inst/include -fpic -g -O2 -ffile-prefix-map=/build/r-base-H8urij/r-base-4.4.0=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -UNDEBUG -Wall -pedantic -g -O0 -c read_ods_internals.cpp -o read_ods_internals.o - g++ -std=gnu++17 -I"/usr/share/R/include" -DNDEBUG -I../inst/include -fpic -g -O2 -ffile-prefix-map=/build/r-base-H8urij/r-base-4.4.0=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -UNDEBUG -Wall -pedantic -g -O0 -c splice.cpp -o splice.o - g++ -std=gnu++17 -I"/usr/share/R/include" -DNDEBUG -I../inst/include -fpic -g -O2 -ffile-prefix-map=/build/r-base-H8urij/r-base-4.4.0=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -UNDEBUG -Wall -pedantic -g -O0 -c write_sheet_file_.cpp -o write_sheet_file_.o - g++ -std=gnu++17 -shared -L/usr/lib/R/lib -Wl,-Bsymbolic-functions -flto=auto -ffat-lto-objects -flto=auto -Wl,-z,relro -o readODS.so cpp11.o get_sheet_names.o is_ods.o read_flat_ods_.o read_ods_.o read_ods_internals.o splice.o write_sheet_file_.o -L/usr/lib/R/lib -lR - make[1]: Leaving directory '/home/chainsawriot/dev/readODS/src' - installing to /tmp/RtmpBw5a9V/devtools_install_3c7146ca2cbc6/00LOCK-readODS/00new/readODS/libs - ** checking absolute paths in shared objects and dynamic libraries - * DONE (readODS) ``` r system.time(path <- readODS::write_ods(flights)) ``` user system elapsed - 13.219 0.480 13.910 + 13.057 0.564 13.826 ``` r system.time(out <- readODS::read_ods(path)) ``` user system elapsed - 27.063 1.952 29.042 + 26.574 2.184 28.959 ``` r all.equal(out, flights) @@ -95,13 +72,13 @@ all.equal(out, flights) sessionInfo() ``` - R version 4.4.0 (2024-04-24) + R version 4.5.2 (2025-10-31) Platform: x86_64-pc-linux-gnu - Running under: Ubuntu 22.04.4 LTS + Running under: Ubuntu 22.04.5 LTS Matrix products: default - BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 - LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 + BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 + LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C @@ -118,22 +95,20 @@ sessionInfo() [1] stats graphics grDevices utils datasets methods base other attached packages: - [1] readODS_2.3.1 testthat_3.2.1 nycflights13_1.0.2 + [1] readODS_2.3.4 testthat_3.2.3 nycflights13_1.0.2 loaded via a namespace (and not attached): - [1] utf8_1.2.4 stringi_1.8.4 digest_0.6.35 magrittr_2.0.3 - [5] evaluate_0.23 pkgload_1.3.4 fastmap_1.1.1 cellranger_1.1.0 - [9] rprojroot_2.0.4 jsonlite_1.8.8 zip_2.3.1 processx_3.8.4 - [13] writexl_1.5.0 pkgbuild_1.4.4 sessioninfo_1.2.2 brio_1.1.4 - [17] ps_1.7.6 urlchecker_1.0.1 promises_1.3.0 purrr_1.0.2 - [21] fansi_1.0.6 cli_3.6.2 shiny_1.8.1.1 rlang_1.1.4 - [25] ellipsis_0.3.2 withr_3.0.0 remotes_2.5.0 cachem_1.0.8 - [29] yaml_2.3.8 devtools_2.4.5 tools_4.4.0 tzdb_0.4.0 - [33] memoise_2.0.1 httpuv_1.6.15 vctrs_0.6.5 R6_2.5.1 - [37] mime_0.12 lifecycle_1.0.4 minty_0.0.1 stringr_1.5.1 - [41] fs_1.6.3 htmlwidgets_1.6.4 usethis_2.2.3 miniUI_0.1.1.1 - [45] callr_3.7.6 desc_1.4.3 pkgconfig_2.0.3 pillar_1.9.0 - [49] later_1.3.2 glue_1.7.0 profvis_0.3.8 Rcpp_1.0.12 - [53] xfun_0.43 tibble_3.2.1 rstudioapi_0.16.0 knitr_1.46 - [57] xtable_1.8-4 htmltools_0.5.8.1 rmarkdown_2.26 compiler_4.4.0 - [61] readxl_1.4.3 + [1] stringi_1.8.4 digest_0.6.37 magrittr_2.0.3 evaluate_1.0.5 + [5] pkgload_1.4.0 fastmap_1.2.0 cellranger_1.1.0 rprojroot_2.1.1 + [9] jsonlite_2.0.0 zip_2.3.3 writexl_1.5.1 pkgbuild_1.4.8 + [13] sessioninfo_1.2.3 brio_1.1.5 urlchecker_1.0.1 promises_1.3.3 + [17] purrr_1.0.2 cli_3.6.3 shiny_1.11.1 rlang_1.1.4 + [21] ellipsis_0.3.2 remotes_2.5.0 withr_3.0.1 cachem_1.1.0 + [25] yaml_2.3.10 devtools_2.4.5 tools_4.5.2 tzdb_0.4.0 + [29] memoise_2.0.1 httpuv_1.6.16 vctrs_0.6.5 R6_2.5.1 + [33] mime_0.13 lifecycle_1.0.4 minty_0.0.5 fs_1.6.6 + [37] htmlwidgets_1.6.4 usethis_3.1.0 miniUI_0.1.2 pkgconfig_2.0.3 + [41] desc_1.4.3 pillar_1.11.1 later_1.4.4 glue_1.8.0 + [45] profvis_0.4.0 Rcpp_1.1.0 xfun_0.53 tibble_3.2.1 + [49] rstudioapi_0.17.1 knitr_1.50 xtable_1.8-4 htmltools_0.5.8.1 + [53] rmarkdown_2.29 compiler_4.5.2 readxl_1.4.3 diff --git a/benchmark/tidyodsexample.md b/benchmark/tidyodsexample.md index f3621f1..f72d051 100644 --- a/benchmark/tidyodsexample.md +++ b/benchmark/tidyodsexample.md @@ -5,7 +5,7 @@ date() ``` - [1] "Tue Jun 4 18:39:19 2024" + [1] "Wed Nov 19 20:05:26 2025" ``` r devtools::load_all() @@ -23,7 +23,7 @@ bench::mark("readODS" = read_ods(file, sheet = 2), check = FALSE, filter_gc = FA # A tibble: 1 × 4 expression min median mean - 1 readODS 5.08ms 5.43ms 7.84ms + 1 readODS 4.49ms 4.79ms 8.62ms ``` r postcodes_file <- here::here("benchmark/civil-service-postcodes-2021.ods") @@ -52,4 +52,4 @@ bench::mark("readODS" = readODS::read_ods(postcodes_file, 2), check = FALSE, fil # A tibble: 1 × 4 expression min median mean - 1 readODS 416ms 425ms 427ms + 1 readODS 417ms 452ms 458ms diff --git a/benchmark/write_ods.md b/benchmark/write_ods.md index 15cd055..3c861cd 100644 --- a/benchmark/write_ods.md +++ b/benchmark/write_ods.md @@ -5,7 +5,7 @@ date() ``` - [1] "Tue Jun 4 18:39:29 2024" + [1] "Wed Nov 19 20:06:30 2025" ``` r devtools::load_all() @@ -28,7 +28,7 @@ system.time(write_ods(df1, path = tempfile(fileext = ".ods"))) ``` user system elapsed - 0.042 0.004 0.053 + 0.046 0.003 0.048 Large df @@ -37,19 +37,19 @@ system.time(write_ods(nycflights13::flights)) ``` user system elapsed - 13.945 0.556 14.714 + 14.300 0.648 15.190 ``` r sessionInfo() ``` - R version 4.4.0 (2024-04-24) + R version 4.5.2 (2025-10-31) Platform: x86_64-pc-linux-gnu - Running under: Ubuntu 22.04.4 LTS + Running under: Ubuntu 22.04.5 LTS Matrix products: default - BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 - LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 + BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 + LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C @@ -66,20 +66,20 @@ sessionInfo() [1] stats graphics grDevices utils datasets methods base other attached packages: - [1] readODS_2.3.1 testthat_3.2.1 + [1] readODS_2.3.4 testthat_3.2.3 loaded via a namespace (and not attached): - [1] utf8_1.2.4 stringi_1.8.4 digest_0.6.35 magrittr_2.0.3 - [5] evaluate_0.23 pkgload_1.3.4 fastmap_1.1.1 cellranger_1.1.0 - [9] rprojroot_2.0.4 jsonlite_1.8.8 zip_2.3.1 pkgbuild_1.4.4 - [13] sessioninfo_1.2.2 brio_1.1.4 urlchecker_1.0.1 promises_1.3.0 - [17] purrr_1.0.2 fansi_1.0.6 cli_3.6.2 shiny_1.8.1.1 - [21] rlang_1.1.4 ellipsis_0.3.2 remotes_2.5.0 withr_3.0.0 - [25] cachem_1.0.8 yaml_2.3.8 devtools_2.4.5 tools_4.4.0 - [29] memoise_2.0.1 httpuv_1.6.15 vctrs_0.6.5 R6_2.5.1 - [33] mime_0.12 lifecycle_1.0.4 minty_0.0.1 stringr_1.5.1 - [37] fs_1.6.3 htmlwidgets_1.6.4 usethis_2.2.3 miniUI_0.1.1.1 - [41] pkgconfig_2.0.3 desc_1.4.3 pillar_1.9.0 later_1.3.2 - [45] glue_1.7.0 profvis_0.3.8 Rcpp_1.0.12 xfun_0.43 - [49] tibble_3.2.1 rstudioapi_0.16.0 knitr_1.46 xtable_1.8-4 - [53] htmltools_0.5.8.1 rmarkdown_2.26 nycflights13_1.0.2 compiler_4.4.0 + [1] miniUI_0.1.2 jsonlite_2.0.0 compiler_4.5.2 brio_1.1.5 + [5] promises_1.3.3 zip_2.3.3 Rcpp_1.1.0 nycflights13_1.0.2 + [9] later_1.4.4 yaml_2.3.10 fastmap_1.2.0 mime_0.13 + [13] R6_2.5.1 knitr_1.50 htmlwidgets_1.6.4 tibble_3.2.1 + [17] desc_1.4.3 profvis_0.4.0 rprojroot_2.1.1 shiny_1.11.1 + [21] pillar_1.11.1 rlang_1.1.4 stringi_1.8.4 cachem_1.1.0 + [25] httpuv_1.6.16 xfun_0.53 fs_1.6.6 pkgload_1.4.0 + [29] memoise_2.0.1 cli_3.6.3 withr_3.0.1 magrittr_2.0.3 + [33] digest_0.6.37 rstudioapi_0.17.1 xtable_1.8-4 remotes_2.5.0 + [37] devtools_2.4.5 lifecycle_1.0.4 vctrs_0.6.5 minty_0.0.5 + [41] evaluate_1.0.5 glue_1.8.0 cellranger_1.1.0 urlchecker_1.0.1 + [45] sessioninfo_1.2.3 pkgbuild_1.4.8 rmarkdown_2.29 purrr_1.0.2 + [49] pkgconfig_2.0.3 tools_4.5.2 usethis_3.1.0 ellipsis_0.3.2 + [53] htmltools_0.5.8.1 diff --git a/benchmark/write_ods_apend.md b/benchmark/write_ods_apend.md index aceb0e1..f275910 100644 --- a/benchmark/write_ods_apend.md +++ b/benchmark/write_ods_apend.md @@ -5,7 +5,7 @@ date() ``` - [1] "Tue Jun 4 18:39:27 2024" + [1] "Wed Nov 19 20:06:27 2025" ``` r devtools::load_all() @@ -30,33 +30,33 @@ system.time(write_ods(df1, path = path, sheet = "aaaa", append = TRUE)) ``` user system elapsed - 0.257 0.017 0.281 + 0.324 0.019 0.344 ``` r system.time(write_ods(df1, path = path, sheet = "aaaa", update = TRUE)) ``` user system elapsed - 0.323 0.028 0.359 + 0.338 0.027 0.366 ``` r system.time(write_ods(mtcars, path = path, sheet = "aaaa", update = TRUE)) ``` user system elapsed - 0.211 0.028 0.253 + 0.215 0.024 0.239 ``` r sessionInfo() ``` - R version 4.4.0 (2024-04-24) + R version 4.5.2 (2025-10-31) Platform: x86_64-pc-linux-gnu - Running under: Ubuntu 22.04.4 LTS + Running under: Ubuntu 22.04.5 LTS Matrix products: default - BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 - LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 + BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 + LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0 locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C @@ -73,20 +73,19 @@ sessionInfo() [1] stats graphics grDevices utils datasets methods base other attached packages: - [1] readODS_2.3.1 testthat_3.2.1 + [1] readODS_2.3.4 testthat_3.2.3 loaded via a namespace (and not attached): - [1] utf8_1.2.4 stringi_1.8.4 digest_0.6.35 magrittr_2.0.3 - [5] evaluate_0.23 pkgload_1.3.4 fastmap_1.1.1 cellranger_1.1.0 - [9] rprojroot_2.0.4 jsonlite_1.8.8 zip_2.3.1 pkgbuild_1.4.4 - [13] sessioninfo_1.2.2 brio_1.1.4 urlchecker_1.0.1 promises_1.3.0 - [17] purrr_1.0.2 fansi_1.0.6 cli_3.6.2 shiny_1.8.1.1 - [21] rlang_1.1.4 ellipsis_0.3.2 remotes_2.5.0 withr_3.0.0 - [25] cachem_1.0.8 yaml_2.3.8 devtools_2.4.5 tools_4.4.0 - [29] memoise_2.0.1 httpuv_1.6.15 vctrs_0.6.5 R6_2.5.1 - [33] mime_0.12 lifecycle_1.0.4 minty_0.0.1 stringr_1.5.1 - [37] fs_1.6.3 htmlwidgets_1.6.4 usethis_2.2.3 miniUI_0.1.1.1 - [41] pkgconfig_2.0.3 desc_1.4.3 pillar_1.9.0 later_1.3.2 - [45] glue_1.7.0 profvis_0.3.8 Rcpp_1.0.12 xfun_0.43 - [49] tibble_3.2.1 rstudioapi_0.16.0 knitr_1.46 xtable_1.8-4 - [53] htmltools_0.5.8.1 rmarkdown_2.26 compiler_4.4.0 + [1] miniUI_0.1.2 jsonlite_2.0.0 compiler_4.5.2 brio_1.1.5 + [5] promises_1.3.3 zip_2.3.3 Rcpp_1.1.0 later_1.4.4 + [9] yaml_2.3.10 fastmap_1.2.0 mime_0.13 R6_2.5.1 + [13] knitr_1.50 htmlwidgets_1.6.4 tibble_3.2.1 desc_1.4.3 + [17] profvis_0.4.0 rprojroot_2.1.1 shiny_1.11.1 pillar_1.11.1 + [21] rlang_1.1.4 stringi_1.8.4 cachem_1.1.0 httpuv_1.6.16 + [25] xfun_0.53 fs_1.6.6 pkgload_1.4.0 memoise_2.0.1 + [29] cli_3.6.3 withr_3.0.1 magrittr_2.0.3 digest_0.6.37 + [33] rstudioapi_0.17.1 xtable_1.8-4 remotes_2.5.0 devtools_2.4.5 + [37] lifecycle_1.0.4 vctrs_0.6.5 minty_0.0.5 evaluate_1.0.5 + [41] glue_1.8.0 cellranger_1.1.0 urlchecker_1.0.1 sessioninfo_1.2.3 + [45] pkgbuild_1.4.8 rmarkdown_2.29 purrr_1.0.2 pkgconfig_2.0.3 + [49] tools_4.5.2 usethis_3.1.0 ellipsis_0.3.2 htmltools_0.5.8.1 diff --git a/inst/WORDLIST b/inst/WORDLIST index 31d6b36..d90b3c2 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -18,12 +18,14 @@ fods hh hong hsu +knitr mtcars nd ods phonixor rOpenSci rchk +rmarkdown steuer thosjleeper tibble diff --git a/man/readODS-package.Rd b/man/readODS-package.Rd index eb317f8..83e2632 100644 --- a/man/readODS-package.Rd +++ b/man/readODS-package.Rd @@ -41,6 +41,7 @@ Other contributors: \item Matt Kerlogue [contributor] \item Michal Lauer \email{michal.lauer.25@gmail.com} [contributor] \item Till Straube \email{straube@geo.uni-frankfurt.de} [contributor] + \item Mauricio Vargas Sepulveda \email{m.vargas.sepulveda@gmail.com} [contributor] \item Marcin Kalicinski (Author of included RapidXML code) [contributor, copyright holder] } diff --git a/man/readODS_progress.Rd b/man/readODS_progress.Rd new file mode 100644 index 0000000..4464af6 --- /dev/null +++ b/man/readODS_progress.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_ods.R +\name{readODS_progress} +\alias{readODS_progress} +\title{Determine whether to show progress bar} +\usage{ +readODS_progress() +} +\value{ +logical indicating whether to show progress +} +\description{ +By default, readODS displays a progress bar unless one of the following is TRUE: +\itemize{ +\item The progress is explicitly disabled by setting options(readODS.show_progress = FALSE). +\item The code is run in a non-interactive session (interactive() is FALSE). +\item The code is run by knitr / rmarkdown. +} +} diff --git a/man/read_ods.Rd b/man/read_ods.Rd index 6fc70c8..814732f 100644 --- a/man/read_ods.Rd +++ b/man/read_ods.Rd @@ -22,7 +22,9 @@ read_ods( ods_format = c("auto", "ods", "fods"), guess = FALSE, trim_ws = TRUE, - n_max = Inf + n_max = Inf, + guess_max = min(1000, n_max), + progress = readODS_progress() ) read_fods( @@ -40,7 +42,9 @@ read_fods( as_tibble = TRUE, .name_repair = "unique", trim_ws = TRUE, - n_max = Inf + n_max = Inf, + guess_max = min(1000, n_max), + progress = readODS_progress() ) } \arguments{ @@ -89,6 +93,10 @@ controls whether we attempt to guess format based on the file signature or \item{trim_ws}{logical, should leading and trailing whitespace be trimmed?} \item{n_max}{numeric, Maximum number of data rows to read. Ignored if \code{range} is given.} + +\item{guess_max}{numeric, Maximum number of data rows to use for guessing column types. Defaults to min(1000, n_max).} + +\item{progress}{logical, Display a progress bar? By default, shows a progress bar in interactive sessions unless disabled. See \code{\link{readODS_progress}} for more details.} } \value{ A tibble (\code{tibble}) or data frame (\code{data.frame}) containing a representation of data in the (f)ods file. diff --git a/src/cpp11.cpp b/src/cpp11.cpp index d0b2739..2ccb0ae 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -6,59 +6,59 @@ #include // get_sheet_names.cpp -cpp11::strings get_sheet_names_(const std::string file, const bool include_external_data); +cpp11::strings get_sheet_names_(const std::string & file, const bool include_external_data); extern "C" SEXP _readODS_get_sheet_names_(SEXP file, SEXP include_external_data) { BEGIN_CPP11 - return cpp11::as_sexp(get_sheet_names_(cpp11::as_cpp>(file), cpp11::as_cpp>(include_external_data))); + return cpp11::as_sexp(get_sheet_names_(cpp11::as_cpp>(file), cpp11::as_cpp>(include_external_data))); END_CPP11 } // get_sheet_names.cpp -cpp11::strings get_flat_sheet_names_(const std::string file, const bool include_external_data); +cpp11::strings get_flat_sheet_names_(const std::string & file, const bool include_external_data); extern "C" SEXP _readODS_get_flat_sheet_names_(SEXP file, SEXP include_external_data) { BEGIN_CPP11 - return cpp11::as_sexp(get_flat_sheet_names_(cpp11::as_cpp>(file), cpp11::as_cpp>(include_external_data))); + return cpp11::as_sexp(get_flat_sheet_names_(cpp11::as_cpp>(file), cpp11::as_cpp>(include_external_data))); END_CPP11 } // read_flat_ods_.cpp -cpp11::strings read_flat_ods_(const std::string file, int start_row, int stop_row, int start_col, int stop_col, const int sheet_index, const bool formula_as_formula); +cpp11::strings read_flat_ods_(const std::string & file, int start_row, int stop_row, int start_col, int stop_col, const int sheet_index, const bool formula_as_formula); extern "C" SEXP _readODS_read_flat_ods_(SEXP file, SEXP start_row, SEXP stop_row, SEXP start_col, SEXP stop_col, SEXP sheet_index, SEXP formula_as_formula) { BEGIN_CPP11 - return cpp11::as_sexp(read_flat_ods_(cpp11::as_cpp>(file), cpp11::as_cpp>(start_row), cpp11::as_cpp>(stop_row), cpp11::as_cpp>(start_col), cpp11::as_cpp>(stop_col), cpp11::as_cpp>(sheet_index), cpp11::as_cpp>(formula_as_formula))); + return cpp11::as_sexp(read_flat_ods_(cpp11::as_cpp>(file), cpp11::as_cpp>(start_row), cpp11::as_cpp>(stop_row), cpp11::as_cpp>(start_col), cpp11::as_cpp>(stop_col), cpp11::as_cpp>(sheet_index), cpp11::as_cpp>(formula_as_formula))); END_CPP11 } // read_ods_.cpp -cpp11::strings read_ods_(const std::string file, int start_row, int stop_row, int start_col, int stop_col, const int sheet_index, const bool formula_as_formula); +cpp11::strings read_ods_(const std::string & file, int start_row, int stop_row, int start_col, int stop_col, const int sheet_index, const bool formula_as_formula); extern "C" SEXP _readODS_read_ods_(SEXP file, SEXP start_row, SEXP stop_row, SEXP start_col, SEXP stop_col, SEXP sheet_index, SEXP formula_as_formula) { BEGIN_CPP11 - return cpp11::as_sexp(read_ods_(cpp11::as_cpp>(file), cpp11::as_cpp>(start_row), cpp11::as_cpp>(stop_row), cpp11::as_cpp>(start_col), cpp11::as_cpp>(stop_col), cpp11::as_cpp>(sheet_index), cpp11::as_cpp>(formula_as_formula))); + return cpp11::as_sexp(read_ods_(cpp11::as_cpp>(file), cpp11::as_cpp>(start_row), cpp11::as_cpp>(stop_row), cpp11::as_cpp>(start_col), cpp11::as_cpp>(stop_col), cpp11::as_cpp>(sheet_index), cpp11::as_cpp>(formula_as_formula))); END_CPP11 } // splice.cpp -std::string splice_sheet_(const std::string original_xml, const std::string sheet_file, const bool flat); +std::string splice_sheet_(const std::string & original_xml, const std::string & sheet_file, const bool flat); extern "C" SEXP _readODS_splice_sheet_(SEXP original_xml, SEXP sheet_file, SEXP flat) { BEGIN_CPP11 - return cpp11::as_sexp(splice_sheet_(cpp11::as_cpp>(original_xml), cpp11::as_cpp>(sheet_file), cpp11::as_cpp>(flat))); + return cpp11::as_sexp(splice_sheet_(cpp11::as_cpp>(original_xml), cpp11::as_cpp>(sheet_file), cpp11::as_cpp>(flat))); END_CPP11 } // splice.cpp -std::string update_sheet_(const std::string original_xml, const std::string sheet_file, const bool flat, const int sheet_index); +std::string update_sheet_(const std::string & original_xml, const std::string & sheet_file, const bool flat, const int sheet_index); extern "C" SEXP _readODS_update_sheet_(SEXP original_xml, SEXP sheet_file, SEXP flat, SEXP sheet_index) { BEGIN_CPP11 - return cpp11::as_sexp(update_sheet_(cpp11::as_cpp>(original_xml), cpp11::as_cpp>(sheet_file), cpp11::as_cpp>(flat), cpp11::as_cpp>(sheet_index))); + return cpp11::as_sexp(update_sheet_(cpp11::as_cpp>(original_xml), cpp11::as_cpp>(sheet_file), cpp11::as_cpp>(flat), cpp11::as_cpp>(sheet_index))); END_CPP11 } // write_sheet_file_.cpp -cpp11::r_string write_sheet_file_(const std::string& filename, const cpp11::data_frame& x, const std::string& sheet_name, const bool row_names, const bool col_names, const bool na_as_string, const bool padding, const std::string& header, const std::string& footer); +cpp11::r_string write_sheet_file_(const std::string & filename, const cpp11::data_frame & x, const std::string & sheet_name, const bool row_names, const bool col_names, const bool na_as_string, const bool padding, const std::string & header, const std::string & footer); extern "C" SEXP _readODS_write_sheet_file_(SEXP filename, SEXP x, SEXP sheet_name, SEXP row_names, SEXP col_names, SEXP na_as_string, SEXP padding, SEXP header, SEXP footer) { BEGIN_CPP11 - return cpp11::as_sexp(write_sheet_file_(cpp11::as_cpp>(filename), cpp11::as_cpp>(x), cpp11::as_cpp>(sheet_name), cpp11::as_cpp>(row_names), cpp11::as_cpp>(col_names), cpp11::as_cpp>(na_as_string), cpp11::as_cpp>(padding), cpp11::as_cpp>(header), cpp11::as_cpp>(footer))); + return cpp11::as_sexp(write_sheet_file_(cpp11::as_cpp>(filename), cpp11::as_cpp>(x), cpp11::as_cpp>(sheet_name), cpp11::as_cpp>(row_names), cpp11::as_cpp>(col_names), cpp11::as_cpp>(na_as_string), cpp11::as_cpp>(padding), cpp11::as_cpp>(header), cpp11::as_cpp>(footer))); END_CPP11 } // write_sheet_file_.cpp -cpp11::r_string write_sheet_file_list_(const std::string& filename, const cpp11::list_of& x, const std::string& sheet_name, const bool row_names, const bool col_names, const bool na_as_string, const bool padding, const std::string& header, const std::string& footer); +cpp11::r_string write_sheet_file_list_(const std::string & filename, const cpp11::list_of & x, const std::string & sheet_name, const bool row_names, const bool col_names, const bool na_as_string, const bool padding, const std::string & header, const std::string & footer); extern "C" SEXP _readODS_write_sheet_file_list_(SEXP filename, SEXP x, SEXP sheet_name, SEXP row_names, SEXP col_names, SEXP na_as_string, SEXP padding, SEXP header, SEXP footer) { BEGIN_CPP11 - return cpp11::as_sexp(write_sheet_file_list_(cpp11::as_cpp>(filename), cpp11::as_cpp&>>(x), cpp11::as_cpp>(sheet_name), cpp11::as_cpp>(row_names), cpp11::as_cpp>(col_names), cpp11::as_cpp>(na_as_string), cpp11::as_cpp>(padding), cpp11::as_cpp>(header), cpp11::as_cpp>(footer))); + return cpp11::as_sexp(write_sheet_file_list_(cpp11::as_cpp>(filename), cpp11::as_cpp &>>(x), cpp11::as_cpp>(sheet_name), cpp11::as_cpp>(row_names), cpp11::as_cpp>(col_names), cpp11::as_cpp>(na_as_string), cpp11::as_cpp>(padding), cpp11::as_cpp>(header), cpp11::as_cpp>(footer))); END_CPP11 } diff --git a/src/get_sheet_names.cpp b/src/get_sheet_names.cpp index 96c6420..5ed5958 100644 --- a/src/get_sheet_names.cpp +++ b/src/get_sheet_names.cpp @@ -1,84 +1,146 @@ -#include "readxl/zip.cpp" #include "is_ods.h" #include "read_ods_internals.h" +#include "readxl/zip.cpp" -cpp11::strings get_sheet_names_from_content (rapidxml::xml_node<>* rootNode, const bool include_external_data){ - - cpp11::writable::strings sheetNames(1); +cpp11::strings get_sheet_names_from_content(rapidxml::xml_node<> *rootNode, + const bool include_external_data) { - int i = 0; - int n = 1; + // Cache string literals to avoid repeated comparisons + static const char *table_table = "table:table"; + static const char *table_table_source = "table:table-source"; + static const char *table_name = "table:name"; - for (rapidxml::xml_node<>* sheetData = rootNode->first_node("table:table"); - sheetData; - sheetData = sheetData->next_sibling("table:table")){ + // First pass: count sheets to avoid multiple reallocations + int sheet_count = 0; + for (rapidxml::xml_node<> *sheetData = rootNode->first_node(table_table); + sheetData; sheetData = sheetData->next_sibling(table_table)) { + if (!include_external_data && sheetData->first_node(table_table_source)) { + continue; + } + sheet_count++; + } + // Pre-allocate with exact size + cpp11::writable::strings sheetNames(sheet_count); - if (!include_external_data && sheetData->first_node("table:table-source")){ - continue; - } - if (i >= n) { - n *= 2; - sheetNames = Rf_lengthgets(sheetNames, n); - } - rapidxml::xml_attribute<>* name = sheetData->first_attribute("table:name"); - sheetNames[i] = (name != NULL) ? Rf_mkCharCE(name->value(), CE_UTF8) : NA_STRING; - i++; - } + // Second pass: collect sheet names + int i = 0; + for (rapidxml::xml_node<> *sheetData = rootNode->first_node(table_table); + sheetData && i < sheet_count; + sheetData = sheetData->next_sibling(table_table)) { - if (i != n) { - sheetNames = Rf_lengthgets(sheetNames, i); - n = i; + if (!include_external_data && sheetData->first_node(table_table_source)) { + continue; } - return sheetNames; + rapidxml::xml_attribute<> *name = sheetData->first_attribute(table_name); + sheetNames[i] = + (name != NULL) ? Rf_mkCharCE(name->value(), CE_UTF8) : NA_STRING; + i++; + } + return sheetNames; } - - [[cpp11::register]] -cpp11::strings get_sheet_names_(const std::string file, const bool include_external_data){ - if (!is_ods(file)){ - throw std::invalid_argument(file + " is not a correct ODS file"); - } - std::string xmlFile = zip_buffer(file, "content.xml"); - - rapidxml::xml_document<> spreadsheet; +cpp11::strings get_sheet_names_(const std::string &file, + const bool include_external_data) { + if (!is_ods(file)) { + throw std::invalid_argument(file + " is not a correct ODS file"); + } + + std::string xmlFile = zip_buffer(file, "content.xml"); + if (xmlFile.empty()) { + throw std::invalid_argument("Could not extract content.xml from " + file); + } + + rapidxml::xml_document<> spreadsheet; + try { spreadsheet.parse<0>(&xmlFile[0]); - rapidxml::xml_node<>* rootNode; - - rootNode = spreadsheet.first_node()->first_node("office:body")-> - first_node("office:spreadsheet"); - return (get_sheet_names_from_content(rootNode, include_external_data)); - + } catch (const rapidxml::parse_error &e) { + throw std::invalid_argument("XML parsing error: " + std::string(e.what())); + } + + // Navigate with null checks + rapidxml::xml_node<> *doc_node = spreadsheet.first_node(); + if (!doc_node) { + throw std::invalid_argument("Invalid ODS structure: missing document root"); + } + + rapidxml::xml_node<> *body_node = doc_node->first_node("office:body"); + if (!body_node) { + throw std::invalid_argument("Invalid ODS structure: missing office:body"); + } + + rapidxml::xml_node<> *rootNode = body_node->first_node("office:spreadsheet"); + if (!rootNode) { + throw std::invalid_argument( + "Invalid ODS structure: missing office:spreadsheet"); + } + + return get_sheet_names_from_content(rootNode, include_external_data); } [[cpp11::register]] -cpp11::strings get_flat_sheet_names_(const std::string file, const bool include_external_data){ - if (!is_flat_ods(file)){ - throw std::invalid_argument(file + " is not a correct FODS file"); - } - std::string xmlFile; - - std::ifstream in(file, std::ios::in | std::ios::binary); - if (in) { - in.seekg(0, std::ios::end); - xmlFile.resize(in.tellg()); - in.seekg(0, std::ios::beg); - in.read(&xmlFile[0], xmlFile.size()); - in.close(); - } else{ - throw std::invalid_argument("No such file"); - } - rapidxml::xml_document<> spreadsheet; - - xmlFile.push_back('\0'); +cpp11::strings get_flat_sheet_names_(const std::string &file, + const bool include_external_data) { + if (!is_flat_ods(file)) { + throw std::invalid_argument(file + " is not a correct FODS file"); + } + + // More efficient file reading + std::ifstream in(file, std::ios::in | std::ios::binary); + if (!in) { + throw std::invalid_argument("No such file: " + file); + } + + // Get file size efficiently + in.seekg(0, std::ios::end); + std::streamsize file_size = in.tellg(); + if (file_size <= 0) { + throw std::invalid_argument("Empty or invalid file: " + file); + } + + in.seekg(0, std::ios::beg); + + // Reserve memory with extra space for null terminator + std::string xmlFile; + xmlFile.reserve(static_cast(file_size) + 1); + xmlFile.resize(static_cast(file_size)); + + // Read file in one operation + if (!in.read(&xmlFile[0], file_size)) { + throw std::invalid_argument("Error reading file: " + file); + } + in.close(); + + // Add null terminator for RapidXML + xmlFile.push_back('\0'); + + rapidxml::xml_document<> spreadsheet; + try { spreadsheet.parse<0>(&xmlFile[0]); - - rapidxml::xml_node<>* rootNode; - rootNode = spreadsheet.first_node("office:document")->first_node("office:body")-> - first_node("office:spreadsheet"); - - return (get_sheet_names_from_content(rootNode, include_external_data)); + } catch (const rapidxml::parse_error &e) { + throw std::invalid_argument("XML parsing error: " + std::string(e.what())); + } + + // Navigate to root node with null checks + rapidxml::xml_node<> *doc_node = spreadsheet.first_node("office:document"); + if (!doc_node) { + throw std::invalid_argument( + "Invalid ODS structure: missing office:document"); + } + + rapidxml::xml_node<> *body_node = doc_node->first_node("office:body"); + if (!body_node) { + throw std::invalid_argument("Invalid ODS structure: missing office:body"); + } + + rapidxml::xml_node<> *rootNode = body_node->first_node("office:spreadsheet"); + if (!rootNode) { + throw std::invalid_argument( + "Invalid ODS structure: missing office:spreadsheet"); + } + + return get_sheet_names_from_content(rootNode, include_external_data); } diff --git a/src/is_ods.cpp b/src/is_ods.cpp index cca7021..5e28c5f 100644 --- a/src/is_ods.cpp +++ b/src/is_ods.cpp @@ -5,92 +5,132 @@ #include #include -bool is_ods(const std::string file){ - /*Checks that file conforms to some of the spec at - https://docs.oasis-open.org/office/OpenDocument/v1.3/. - - It's not all of them, but if it passes all of these and isn't a spreadsheet - something is very wrong. - - We don't care about the file extension*/ - /*Check that it contains the proper files*/ - if (!zip_has_file(file, "content.xml")){ - /*Strictly speaking this isn't required in the spec, but - we're only interested in files with content.*/ - return false; - } +bool is_ods(const std::string &file) { + /*Checks that file conforms to some of the spec at + https://docs.oasis-open.org/office/OpenDocument/v1.3/. - rapidxml::xml_document<> workbook; - rapidxml::xml_node<>* rootNode; - std:: string xmlFile = zip_buffer(file, "content.xml"); - try { - workbook.parse<0>(&xmlFile[0]); - } catch (const rapidxml::parse_error& e) { - if (strcmp(e.what(), "expected <")){ - throw std::invalid_argument(file + " does not contain a valid content.xml"); - } else { - throw std::invalid_argument("XML parse error"); - } - } - rootNode = workbook.first_node(); - /*Check Section 2.2.1 B) 2.1 - is this a well formed OpenDocument*/ - if (strcmp(rootNode->name(),"office:document-content") != 0){ - return false; - } - /*Check Section 3.3 C)*/ - if (!(rootNode->first_node("office:body"))){ - return false; - } - /*Check Section 2.2.4 C) - this is a spreadsheet*/ - if (!(rootNode->first_node("office:body")->first_node("office:spreadsheet"))){ - return false; + It's not all of them, but if it passes all of these and isn't a spreadsheet + something is very wrong. + + We don't care about the file extension*/ + + // Fast path: Check that it contains the proper files first + if (!zip_has_file(file, "content.xml")) { + /*Strictly speaking this isn't required in the spec, but + we're only interested in files with content.*/ + return false; + } + + std::string xmlFile = zip_buffer(file, "content.xml"); + if (xmlFile.empty()) { + return false; // Could not extract content + } + + rapidxml::xml_document<> workbook; + try { + workbook.parse<0>(&xmlFile[0]); + } catch (const rapidxml::parse_error &e) { + if (strcmp(e.what(), "expected <")) { + throw std::invalid_argument(file + " does not contain a valid content.xml"); + } else { + throw std::invalid_argument("XML parse error"); } - return true; + } + + rapidxml::xml_node<> *rootNode = workbook.first_node(); + if (!rootNode) { + return false; + } + + // Cache string literals for performance + static const char *office_document_content = "office:document-content"; + static const char *office_body = "office:body"; + static const char *office_spreadsheet = "office:spreadsheet"; + + /*Check Section 2.2.1 B) 2.1 - is this a well formed OpenDocument*/ + if (strcmp(rootNode->name(), office_document_content) != 0) { + return false; + } + + /*Check Section 3.3 C)*/ + rapidxml::xml_node<> *body_node = rootNode->first_node(office_body); + if (!body_node) { + return false; + } + + /*Check Section 2.2.4 C) - this is a spreadsheet*/ + if (!body_node->first_node(office_spreadsheet)) { + return false; + } + + return true; } -bool is_flat_ods(const std::string file){ - /*Checks that file conforms to some of the spec at - https://docs.oasis-open.org/office/OpenDocument/v1.3/.*/ - rapidxml::xml_document<> workbook; - rapidxml::xml_node<>* rootNode; - std::string xmlFile; - - std::ifstream in(file, std::ios::in | std::ios::binary); - if (in) { - in.seekg(0, std::ios::end); - xmlFile.resize(in.tellg()); - in.seekg(0, std::ios::beg); - in.read(&xmlFile[0], xmlFile.size()); - in.close(); - } else{ - throw std::invalid_argument("No such file"); - } +bool is_flat_ods(const std::string &file) { + /*Checks that file conforms to some of the spec at + https://docs.oasis-open.org/office/OpenDocument/v1.3/.*/ - xmlFile.push_back('\0'); + // More efficient file reading + std::ifstream in(file, std::ios::in | std::ios::binary); + if (!in) { + return false; // File doesn't exist - not a valid FODS + } - try { - workbook.parse<0>(&xmlFile[0]); - } catch (const rapidxml::parse_error& e) { - if (strcmp(e.what(), "expected <")){ - throw std::invalid_argument(file + " is not a flat XML file"); - } else { - throw std::invalid_argument("XML parse error"); - } - } - // Section 2.2.1C) - rootNode = workbook.first_node("office:document"); - if (rootNode == 0){ - return false; - } + // Get file size efficiently + in.seekg(0, std::ios::end); + std::streamsize file_size = in.tellg(); + if (file_size <= 0) { + return false; // Empty file can't be FODS + } - /*Check Section 3.3 C)*/ - if (!(rootNode->first_node("office:body"))){ - return false; - } - /*Check Section 2.2.4 C) - this is a spreadsheet*/ - if (!(rootNode->first_node("office:body")->first_node("office:spreadsheet"))){ - return false; + in.seekg(0, std::ios::beg); + + // Reserve memory with extra space for null terminator + std::string xmlFile; + xmlFile.reserve(static_cast(file_size) + 1); + xmlFile.resize(static_cast(file_size)); + + // Read file in one operation + if (!in.read(&xmlFile[0], file_size)) { + return false; // Read error + } + in.close(); + + // Add null terminator for RapidXML + xmlFile.push_back('\0'); + + rapidxml::xml_document<> workbook; + try { + workbook.parse<0>(&xmlFile[0]); + } catch (const rapidxml::parse_error &e) { + if (strcmp(e.what(), "expected <")) { + throw std::invalid_argument(file + " is not a flat XML file"); + } else { + throw std::invalid_argument("XML parse error"); } + } + + // Cache string literals for performance + static const char *office_document = "office:document"; + static const char *office_body = "office:body"; + static const char *office_spreadsheet = "office:spreadsheet"; + + // Section 2.2.1C) + rapidxml::xml_node<> *rootNode = workbook.first_node(office_document); + if (!rootNode) { + return false; + } + + /*Check Section 3.3 C)*/ + rapidxml::xml_node<> *body_node = rootNode->first_node(office_body); + if (!body_node) { + return false; + } + + /*Check Section 2.2.4 C) - this is a spreadsheet*/ + if (!body_node->first_node(office_spreadsheet)) { + return false; + } - return true; + return true; } diff --git a/src/is_ods.h b/src/is_ods.h index 69c562e..4051e64 100644 --- a/src/is_ods.h +++ b/src/is_ods.h @@ -1,7 +1,7 @@ #pragma once -#include #include "readxl/zip.h" +#include -bool is_ods(const std::string file); -bool is_flat_ods(const std::string file); \ No newline at end of file +bool is_ods(const std::string &file); +bool is_flat_ods(const std::string &file); diff --git a/src/rapidxml/rapidxml.hpp b/src/rapidxml/rapidxml.hpp index ae91e08..6699d58 100644 --- a/src/rapidxml/rapidxml.hpp +++ b/src/rapidxml/rapidxml.hpp @@ -6,103 +6,95 @@ // Revision $DateTime: 2009/05/13 01:46:17 $ //! \file rapidxml.hpp This file contains rapidxml parser and DOM implementation -// If standard library is disabled, user must provide implementations of required functions and typedefs +// If standard library is disabled, user must provide implementations of +// required functions and typedefs #if !defined(RAPIDXML_NO_STDLIB) - #include // For std::size_t - #include // For assert - #include // For placement new +#include // For assert +#include // For std::size_t +#include // For placement new #endif -// On MSVC, disable "conditional expression is constant" warning (level 4). -// This warning is almost impossible to avoid with certain types of templated code +// On MSVC, disable "conditional expression is constant" warning (level 4). +// This warning is almost impossible to avoid with certain types of templated +// code #ifdef _MSC_VER - #pragma warning(push) - #pragma warning(disable:4127) // Conditional expression is constant +#pragma warning(push) +#pragma warning(disable : 4127) // Conditional expression is constant #endif /////////////////////////////////////////////////////////////////////////// // RAPIDXML_PARSE_ERROR - + #if defined(RAPIDXML_NO_EXCEPTIONS) -#define RAPIDXML_PARSE_ERROR(what, where) { parse_error_handler(what, where); assert(0); } - -namespace rapidxml -{ - //! When exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, - //! this function is called to notify user about the error. - //! It must be defined by the user. - //!

- //! This function cannot return. If it does, the results are undefined. - //!

- //! A very simple definition might look like that: - //!
-    //! void %rapidxml::%parse_error_handler(const char *what, void *where)
-    //! {
-    //!     std::cout << "Parse error: " << what << "\n";
-    //!     std::abort();
-    //! }
-    //! 
- //! \param what Human readable description of the error. - //! \param where Pointer to character data where error was detected. - void parse_error_handler(const char *what, void *where); -} +#define RAPIDXML_PARSE_ERROR(what, where) \ + { \ + parse_error_handler(what, where); \ + assert(0); \ + } + +namespace rapidxml { +//! When exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, +//! this function is called to notify user about the error. +//! It must be defined by the user. +//!

+//! This function cannot return. If it does, the results are undefined. +//!

+//! A very simple definition might look like that: +//!
+//! void %rapidxml::%parse_error_handler(const char *what, void *where)
+//! {
+//!     std::cout << "Parse error: " << what << "\n";
+//!     std::abort();
+//! }
+//! 
+//! \param what Human readable description of the error. +//! \param where Pointer to character data where error was detected. +void parse_error_handler(const char *what, void *where); +} // namespace rapidxml #else - -#include // For std::exception - -#define RAPIDXML_PARSE_ERROR(what, where) throw parse_error(what, where) - -namespace rapidxml -{ - - //! Parse error exception. - //! This exception is thrown by the parser when an error occurs. - //! Use what() function to get human-readable error message. - //! Use where() function to get a pointer to position within source text where error was detected. - //!

- //! If throwing exceptions by the parser is undesirable, - //! it can be disabled by defining RAPIDXML_NO_EXCEPTIONS macro before rapidxml.hpp is included. - //! This will cause the parser to call rapidxml::parse_error_handler() function instead of throwing an exception. - //! This function must be defined by the user. - //!

- //! This class derives from std::exception class. - class parse_error: public std::exception - { - - public: - - //! Constructs parse error - parse_error(const char *what, void *where) - : m_what(what) - , m_where(where) - { - } - //! Gets human readable description of error. - //! \return Pointer to null terminated description of the error. - virtual const char *what() const throw() - { - return m_what; - } - - //! Gets pointer to character data where error happened. - //! Ch should be the same as char type of xml_document that produced the error. - //! \return Pointer to location within the parsed string where error occured. - template - Ch *where() const - { - return reinterpret_cast(m_where); - } +#include // For std::exception - private: - - const char *m_what; - void *m_where; +#define RAPIDXML_PARSE_ERROR(what, where) throw parse_error(what, where) - }; -} +namespace rapidxml { + +//! Parse error exception. +//! This exception is thrown by the parser when an error occurs. +//! Use what() function to get human-readable error message. +//! Use where() function to get a pointer to position within source text where +//! error was detected.

If throwing exceptions by the parser is +//! undesirable, it can be disabled by defining RAPIDXML_NO_EXCEPTIONS macro +//! before rapidxml.hpp is included. This will cause the parser to call +//! rapidxml::parse_error_handler() function instead of throwing an exception. +//! This function must be defined by the user. +//!

+//! This class derives from std::exception class. +class parse_error : public std::exception { + +public: + //! Constructs parse error + parse_error(const char *what, void *where) : m_what(what), m_where(where) {} + + //! Gets human readable description of error. + //! \return Pointer to null terminated description of the error. + virtual const char *what() const throw() { return m_what; } + + //! Gets pointer to character data where error happened. + //! Ch should be the same as char type of xml_document that produced the + //! error. + //! \return Pointer to location within the parsed string where error occured. + template Ch *where() const { + return reinterpret_cast(m_where); + } + +private: + const char *m_what; + void *m_where; +}; +} // namespace rapidxml #endif @@ -110,2487 +102,2400 @@ namespace rapidxml // Pool sizes #ifndef RAPIDXML_STATIC_POOL_SIZE - // Size of static memory block of memory_pool. - // Define RAPIDXML_STATIC_POOL_SIZE before including rapidxml.hpp if you want to override the default value. - // No dynamic memory allocations are performed by memory_pool until static memory is exhausted. - #define RAPIDXML_STATIC_POOL_SIZE (64 * 1024) + // Size of static memory block of memory_pool. +// Define RAPIDXML_STATIC_POOL_SIZE before including rapidxml.hpp if you want to +// override the default value. No dynamic memory allocations are performed by +// memory_pool until static memory is exhausted. +#define RAPIDXML_STATIC_POOL_SIZE (64 * 1024) #endif #ifndef RAPIDXML_DYNAMIC_POOL_SIZE - // Size of dynamic memory block of memory_pool. - // Define RAPIDXML_DYNAMIC_POOL_SIZE before including rapidxml.hpp if you want to override the default value. - // After the static block is exhausted, dynamic blocks with approximately this size are allocated by memory_pool. - #define RAPIDXML_DYNAMIC_POOL_SIZE (64 * 1024) + // Size of dynamic memory block of memory_pool. +// Define RAPIDXML_DYNAMIC_POOL_SIZE before including rapidxml.hpp if you want +// to override the default value. After the static block is exhausted, dynamic +// blocks with approximately this size are allocated by memory_pool. +#define RAPIDXML_DYNAMIC_POOL_SIZE (64 * 1024) #endif #ifndef RAPIDXML_ALIGNMENT - // Memory allocation alignment. - // Define RAPIDXML_ALIGNMENT before including rapidxml.hpp if you want to override the default value, which is the size of pointer. - // All memory allocations for nodes, attributes and strings will be aligned to this value. - // This must be a power of 2 and at least 1, otherwise memory_pool will not work. - #define RAPIDXML_ALIGNMENT sizeof(void *) + // Memory allocation alignment. +// Define RAPIDXML_ALIGNMENT before including rapidxml.hpp if you want to +// override the default value, which is the size of pointer. All memory +// allocations for nodes, attributes and strings will be aligned to this value. +// This must be a power of 2 and at least 1, otherwise memory_pool will not +// work. +#define RAPIDXML_ALIGNMENT sizeof(void *) #endif -namespace rapidxml -{ - // Forward declarations - template class xml_node; - template class xml_attribute; - template class xml_document; - - //! Enumeration listing all node types produced by the parser. - //! Use xml_node::type() function to query node type. - enum node_type - { - node_document, //!< A document node. Name and value are empty. - node_element, //!< An element node. Name contains element name. Value contains text of first data node. - node_data, //!< A data node. Name is empty. Value contains data text. - node_cdata, //!< A CDATA node. Name is empty. Value contains data text. - node_comment, //!< A comment node. Name is empty. Value contains comment text. - node_declaration, //!< A declaration node. Name and value are empty. Declaration parameters (version, encoding and standalone) are in node attributes. - node_doctype, //!< A DOCTYPE node. Name is empty. Value contains DOCTYPE text. - node_pi //!< A PI node. Name contains target. Value contains instructions. - }; - - /////////////////////////////////////////////////////////////////////// - // Parsing flags - - //! Parse flag instructing the parser to not create data nodes. - //! Text of first data node will still be placed in value of parent element, unless rapidxml::parse_no_element_values flag is also specified. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_no_data_nodes = 0x1; - - //! Parse flag instructing the parser to not use text of first data node as a value of parent element. - //! Can be combined with other flags by use of | operator. - //! Note that child data nodes of element node take precendence over its value when printing. - //! That is, if element has one or more child data nodes and a value, the value will be ignored. - //! Use rapidxml::parse_no_data_nodes flag to prevent creation of data nodes if you want to manipulate data using values of elements. - //!

- //! See xml_document::parse() function. - const int parse_no_element_values = 0x2; - - //! Parse flag instructing the parser to not place zero terminators after strings in the source text. - //! By default zero terminators are placed, modifying source text. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_no_string_terminators = 0x4; - - //! Parse flag instructing the parser to not translate entities in the source text. - //! By default entities are translated, modifying source text. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_no_entity_translation = 0x8; - - //! Parse flag instructing the parser to disable UTF-8 handling and assume plain 8 bit characters. - //! By default, UTF-8 handling is enabled. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_no_utf8 = 0x10; - - //! Parse flag instructing the parser to create XML declaration node. - //! By default, declaration node is not created. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_declaration_node = 0x20; - - //! Parse flag instructing the parser to create comments nodes. - //! By default, comment nodes are not created. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_comment_nodes = 0x40; - - //! Parse flag instructing the parser to create DOCTYPE node. - //! By default, doctype node is not created. - //! Although W3C specification allows at most one DOCTYPE node, RapidXml will silently accept documents with more than one. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_doctype_node = 0x80; - - //! Parse flag instructing the parser to create PI nodes. - //! By default, PI nodes are not created. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_pi_nodes = 0x100; - - //! Parse flag instructing the parser to validate closing tag names. - //! If not set, name inside closing tag is irrelevant to the parser. - //! By default, closing tags are not validated. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_validate_closing_tags = 0x200; - - //! Parse flag instructing the parser to trim all leading and trailing whitespace of data nodes. - //! By default, whitespace is not trimmed. - //! This flag does not cause the parser to modify source text. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_trim_whitespace = 0x400; - - //! Parse flag instructing the parser to condense all whitespace runs of data nodes to a single space character. - //! Trimming of leading and trailing whitespace of data is controlled by rapidxml::parse_trim_whitespace flag. - //! By default, whitespace is not normalized. - //! If this flag is specified, source text will be modified. - //! Can be combined with other flags by use of | operator. - //!

- //! See xml_document::parse() function. - const int parse_normalize_whitespace = 0x800; - - // Compound flags - - //! Parse flags which represent default behaviour of the parser. - //! This is always equal to 0, so that all other flags can be simply ored together. - //! Normally there is no need to inconveniently disable flags by anding with their negated (~) values. - //! This also means that meaning of each flag is a negation of the default setting. - //! For example, if flag name is rapidxml::parse_no_utf8, it means that utf-8 is enabled by default, - //! and using the flag will disable it. - //!

- //! See xml_document::parse() function. - const int parse_default = 0; - - //! A combination of parse flags that forbids any modifications of the source text. - //! This also results in faster parsing. However, note that the following will occur: - //!
    - //!
  • names and values of nodes will not be zero terminated, you have to use xml_base::name_size() and xml_base::value_size() functions to determine where name and value ends
  • - //!
  • entities will not be translated
  • - //!
  • whitespace will not be normalized
  • - //!
- //! See xml_document::parse() function. - const int parse_non_destructive = parse_no_string_terminators | parse_no_entity_translation; - - //! A combination of parse flags resulting in fastest possible parsing, without sacrificing important data. - //!

- //! See xml_document::parse() function. - const int parse_fastest = parse_non_destructive | parse_no_data_nodes; - - //! A combination of parse flags resulting in largest amount of data being extracted. - //! This usually results in slowest parsing. - //!

- //! See xml_document::parse() function. - const int parse_full = parse_declaration_node | parse_comment_nodes | parse_doctype_node | parse_pi_nodes | parse_validate_closing_tags; - - /////////////////////////////////////////////////////////////////////// - // Internals - - //! \cond internal - namespace internal - { - - // Struct that contains lookup tables for the parser - // It must be a template to allow correct linking (because it has static data members, which are defined in a header file). - template - struct lookup_tables - { - static const unsigned char lookup_whitespace[256]; // Whitespace table - static const unsigned char lookup_node_name[256]; // Node name table - static const unsigned char lookup_text[256]; // Text table - static const unsigned char lookup_text_pure_no_ws[256]; // Text table - static const unsigned char lookup_text_pure_with_ws[256]; // Text table - static const unsigned char lookup_attribute_name[256]; // Attribute name table - static const unsigned char lookup_attribute_data_1[256]; // Attribute data table with single quote - static const unsigned char lookup_attribute_data_1_pure[256]; // Attribute data table with single quote - static const unsigned char lookup_attribute_data_2[256]; // Attribute data table with double quotes - static const unsigned char lookup_attribute_data_2_pure[256]; // Attribute data table with double quotes - static const unsigned char lookup_digits[256]; // Digits - static const unsigned char lookup_upcase[256]; // To uppercase conversion table for ASCII characters - }; - - // Find length of the string - template - inline std::size_t measure(const Ch *p) - { - const Ch *tmp = p; - while (*tmp) - ++tmp; - return tmp - p; - } +namespace rapidxml { +// Forward declarations +template class xml_node; +template class xml_attribute; +template class xml_document; + +//! Enumeration listing all node types produced by the parser. +//! Use xml_node::type() function to query node type. +enum node_type { + node_document, //!< A document node. Name and value are empty. + node_element, //!< An element node. Name contains element name. Value contains + //!< text of first data node. + node_data, //!< A data node. Name is empty. Value contains data text. + node_cdata, //!< A CDATA node. Name is empty. Value contains data text. + node_comment, //!< A comment node. Name is empty. Value contains comment text. + node_declaration, //!< A declaration node. Name and value are empty. + //!< Declaration parameters (version, encoding and + //!< standalone) are in node attributes. + node_doctype, //!< A DOCTYPE node. Name is empty. Value contains DOCTYPE text. + node_pi //!< A PI node. Name contains target. Value contains instructions. +}; + +/////////////////////////////////////////////////////////////////////// +// Parsing flags + +//! Parse flag instructing the parser to not create data nodes. +//! Text of first data node will still be placed in value of parent element, +//! unless rapidxml::parse_no_element_values flag is also specified. Can be +//! combined with other flags by use of | operator.

See +//! xml_document::parse() function. +const int parse_no_data_nodes = 0x1; + +//! Parse flag instructing the parser to not use text of first data node as a +//! value of parent element. Can be combined with other flags by use of | +//! operator. Note that child data nodes of element node take precendence over +//! its value when printing. That is, if element has one or more child data +//! nodes and a value, the value will be ignored. Use +//! rapidxml::parse_no_data_nodes flag to prevent creation of data nodes if you +//! want to manipulate data using values of elements.

See +//! xml_document::parse() function. +const int parse_no_element_values = 0x2; + +//! Parse flag instructing the parser to not place zero terminators after +//! strings in the source text. By default zero terminators are placed, +//! modifying source text. Can be combined with other flags by use of | +//! operator.

See xml_document::parse() function. +const int parse_no_string_terminators = 0x4; + +//! Parse flag instructing the parser to not translate entities in the source +//! text. By default entities are translated, modifying source text. Can be +//! combined with other flags by use of | operator.

See +//! xml_document::parse() function. +const int parse_no_entity_translation = 0x8; + +//! Parse flag instructing the parser to disable UTF-8 handling and assume plain +//! 8 bit characters. By default, UTF-8 handling is enabled. Can be combined +//! with other flags by use of | operator.

See xml_document::parse() +//! function. +const int parse_no_utf8 = 0x10; + +//! Parse flag instructing the parser to create XML declaration node. +//! By default, declaration node is not created. +//! Can be combined with other flags by use of | operator. +//!

+//! See xml_document::parse() function. +const int parse_declaration_node = 0x20; + +//! Parse flag instructing the parser to create comments nodes. +//! By default, comment nodes are not created. +//! Can be combined with other flags by use of | operator. +//!

+//! See xml_document::parse() function. +const int parse_comment_nodes = 0x40; + +//! Parse flag instructing the parser to create DOCTYPE node. +//! By default, doctype node is not created. +//! Although W3C specification allows at most one DOCTYPE node, RapidXml will +//! silently accept documents with more than one. Can be combined with other +//! flags by use of | operator.

See xml_document::parse() function. +const int parse_doctype_node = 0x80; + +//! Parse flag instructing the parser to create PI nodes. +//! By default, PI nodes are not created. +//! Can be combined with other flags by use of | operator. +//!

+//! See xml_document::parse() function. +const int parse_pi_nodes = 0x100; + +//! Parse flag instructing the parser to validate closing tag names. +//! If not set, name inside closing tag is irrelevant to the parser. +//! By default, closing tags are not validated. +//! Can be combined with other flags by use of | operator. +//!

+//! See xml_document::parse() function. +const int parse_validate_closing_tags = 0x200; + +//! Parse flag instructing the parser to trim all leading and trailing +//! whitespace of data nodes. By default, whitespace is not trimmed. This flag +//! does not cause the parser to modify source text. Can be combined with other +//! flags by use of | operator.

See xml_document::parse() function. +const int parse_trim_whitespace = 0x400; + +//! Parse flag instructing the parser to condense all whitespace runs of data +//! nodes to a single space character. Trimming of leading and trailing +//! whitespace of data is controlled by rapidxml::parse_trim_whitespace flag. By +//! default, whitespace is not normalized. If this flag is specified, source +//! text will be modified. Can be combined with other flags by use of | +//! operator.

See xml_document::parse() function. +const int parse_normalize_whitespace = 0x800; + +// Compound flags + +//! Parse flags which represent default behaviour of the parser. +//! This is always equal to 0, so that all other flags can be simply ored +//! together. Normally there is no need to inconveniently disable flags by +//! anding with their negated (~) values. This also means that meaning of each +//! flag is a negation of the default setting. For example, if flag name +//! is rapidxml::parse_no_utf8, it means that utf-8 is enabled by +//! default, and using the flag will disable it.

See +//! xml_document::parse() function. +const int parse_default = 0; + +//! A combination of parse flags that forbids any modifications of the source +//! text. This also results in faster parsing. However, note that the following +//! will occur:
  • names and values of nodes will not be zero terminated, +//! you have to use xml_base::name_size() and xml_base::value_size() functions +//! to determine where name and value ends
  • entities will not be +//! translated
  • whitespace will not be normalized
  • +//!
+//! See xml_document::parse() function. +const int parse_non_destructive = + parse_no_string_terminators | parse_no_entity_translation; + +//! A combination of parse flags resulting in fastest possible parsing, without +//! sacrificing important data.

See xml_document::parse() function. +const int parse_fastest = parse_non_destructive | parse_no_data_nodes; + +//! A combination of parse flags resulting in largest amount of data being +//! extracted. This usually results in slowest parsing.

See +//! xml_document::parse() function. +const int parse_full = parse_declaration_node | parse_comment_nodes | + parse_doctype_node | parse_pi_nodes | + parse_validate_closing_tags; + +/////////////////////////////////////////////////////////////////////// +// Internals + +//! \cond internal +namespace internal { + +// Struct that contains lookup tables for the parser +// It must be a template to allow correct linking (because it has static data +// members, which are defined in a header file). +template struct lookup_tables { + static const unsigned char lookup_whitespace[256]; // Whitespace table + static const unsigned char lookup_node_name[256]; // Node name table + static const unsigned char lookup_text[256]; // Text table + static const unsigned char lookup_text_pure_no_ws[256]; // Text table + static const unsigned char lookup_text_pure_with_ws[256]; // Text table + static const unsigned char lookup_attribute_name[256]; // Attribute name table + static const unsigned char + lookup_attribute_data_1[256]; // Attribute data table with single quote + static const unsigned char + lookup_attribute_data_1_pure[256]; // Attribute data table with single + // quote + static const unsigned char + lookup_attribute_data_2[256]; // Attribute data table with double quotes + static const unsigned char + lookup_attribute_data_2_pure[256]; // Attribute data table with double + // quotes + static const unsigned char lookup_digits[256]; // Digits + static const unsigned char + lookup_upcase[256]; // To uppercase conversion table for ASCII characters +}; + +// Find length of the string +template inline std::size_t measure(const Ch *p) { + const Ch *tmp = p; + while (*tmp) + ++tmp; + return tmp - p; +} - // Compare strings for equality - template - inline bool compare(const Ch *p1, std::size_t size1, const Ch *p2, std::size_t size2, bool case_sensitive) - { - if (size1 != size2) - return false; - if (case_sensitive) - { - for (const Ch *end = p1 + size1; p1 < end; ++p1, ++p2) - if (*p1 != *p2) - return false; - } - else - { - for (const Ch *end = p1 + size1; p1 < end; ++p1, ++p2) - if (lookup_tables<0>::lookup_upcase[static_cast(*p1)] != lookup_tables<0>::lookup_upcase[static_cast(*p2)]) - return false; - } - return true; - } +// Compare strings for equality +template +inline bool compare(const Ch *p1, std::size_t size1, const Ch *p2, + std::size_t size2, bool case_sensitive) { + if (size1 != size2) + return false; + if (case_sensitive) { + for (const Ch *end = p1 + size1; p1 < end; ++p1, ++p2) + if (*p1 != *p2) + return false; + } else { + for (const Ch *end = p1 + size1; p1 < end; ++p1, ++p2) + if (lookup_tables<0>::lookup_upcase[static_cast(*p1)] != + lookup_tables<0>::lookup_upcase[static_cast(*p2)]) + return false; + } + return true; +} +} // namespace internal +//! \endcond + +/////////////////////////////////////////////////////////////////////// +// Memory pool + +//! This class is used by the parser to create new nodes and attributes, without +//! overheads of dynamic memory allocation. In most cases, you will not need to +//! use this class directly. However, if you need to create nodes manually or +//! modify names/values of nodes, you are encouraged to use memory_pool of +//! relevant xml_document to allocate the memory. Not only is this faster than +//! allocating them by using new operator, but also their lifetime +//! will be tied to the lifetime of document, possibly simplyfing memory +//! management.

Call allocate_node() or allocate_attribute() functions +//! to obtain new nodes or attributes from the pool. You can also call +//! allocate_string() function to allocate strings. Such strings can then be +//! used as names or values of nodes without worrying about their lifetime. Note +//! that there is no free() function -- all allocations are freed +//! at once when clear() function is called, or when the pool is destroyed. +//!

+//! It is also possible to create a standalone memory_pool, and use it +//! to allocate nodes, whose lifetime will not be tied to any document. +//!

+//! Pool maintains RAPIDXML_STATIC_POOL_SIZE bytes of statically +//! allocated memory. Until static memory is exhausted, no dynamic memory +//! allocations are done. When static memory is exhausted, pool allocates +//! additional blocks of memory of size RAPIDXML_DYNAMIC_POOL_SIZE +//! each, by using global new[] and delete[] +//! operators. This behaviour can be changed by setting custom allocation +//! routines. Use set_allocator() function to set them.

Allocations for +//! nodes, attributes and strings are aligned at RAPIDXML_ALIGNMENT +//! bytes. This value defaults to the size of pointer on target architecture. +//!

+//! To obtain absolutely top performance from the parser, +//! it is important that all nodes are allocated from a single, contiguous block +//! of memory. Otherwise, cache misses when jumping between two (or more) +//! disjoint blocks of memory can slow down parsing quite considerably. If +//! required, you can tweak RAPIDXML_STATIC_POOL_SIZE, +//! RAPIDXML_DYNAMIC_POOL_SIZE and RAPIDXML_ALIGNMENT +//! to obtain best wasted memory to performance compromise. +//! To do it, define their values before rapidxml.hpp file is included. +//! \param Ch Character type of created nodes. +template class memory_pool { + +public: + //! \cond internal + typedef void *(alloc_func)(std::size_t); // Type of user-defined function used + // to allocate memory + typedef void(free_func)( + void *); // Type of user-defined function used to free memory + //! \endcond + + //! Constructs empty pool with default allocator functions. + memory_pool() : m_alloc_func(0), m_free_func(0) { init(); } + + //! Destroys pool and frees all the memory. + //! This causes memory occupied by nodes allocated by the pool to be freed. + //! Nodes allocated from the pool are no longer valid. + ~memory_pool() { clear(); } + + //! Allocates a new node from the pool, and optionally assigns name and value + //! to it. If the allocation request cannot be accomodated, this function will + //! throw std::bad_alloc. If exceptions are disabled by defining + //! RAPIDXML_NO_EXCEPTIONS, this function will call + //! rapidxml::parse_error_handler() function. + //! \param type Type of node to create. + //! \param name Name to assign to the node, or 0 to assign no name. + //! \param value Value to assign to the node, or 0 to assign no value. + //! \param name_size Size of name to assign, or 0 to automatically calculate + //! size from name string. + //! \param value_size Size of value to assign, or 0 to automatically calculate + //! size from value string. + //! \return Pointer to allocated node. This pointer will never be NULL. + xml_node *allocate_node(node_type type, const Ch *name = 0, + const Ch *value = 0, std::size_t name_size = 0, + std::size_t value_size = 0) { + void *memory = allocate_aligned(sizeof(xml_node)); + xml_node *node = new (memory) xml_node(type); + if (name) { + if (name_size > 0) + node->name(name, name_size); + else + node->name(name); + } + if (value) { + if (value_size > 0) + node->value(value, value_size); + else + node->value(value); } - //! \endcond - - /////////////////////////////////////////////////////////////////////// - // Memory pool - - //! This class is used by the parser to create new nodes and attributes, without overheads of dynamic memory allocation. - //! In most cases, you will not need to use this class directly. - //! However, if you need to create nodes manually or modify names/values of nodes, - //! you are encouraged to use memory_pool of relevant xml_document to allocate the memory. - //! Not only is this faster than allocating them by using new operator, - //! but also their lifetime will be tied to the lifetime of document, - //! possibly simplyfing memory management. - //!

- //! Call allocate_node() or allocate_attribute() functions to obtain new nodes or attributes from the pool. - //! You can also call allocate_string() function to allocate strings. - //! Such strings can then be used as names or values of nodes without worrying about their lifetime. - //! Note that there is no free() function -- all allocations are freed at once when clear() function is called, - //! or when the pool is destroyed. - //!

- //! It is also possible to create a standalone memory_pool, and use it - //! to allocate nodes, whose lifetime will not be tied to any document. - //!

- //! Pool maintains RAPIDXML_STATIC_POOL_SIZE bytes of statically allocated memory. - //! Until static memory is exhausted, no dynamic memory allocations are done. - //! When static memory is exhausted, pool allocates additional blocks of memory of size RAPIDXML_DYNAMIC_POOL_SIZE each, - //! by using global new[] and delete[] operators. - //! This behaviour can be changed by setting custom allocation routines. - //! Use set_allocator() function to set them. - //!

- //! Allocations for nodes, attributes and strings are aligned at RAPIDXML_ALIGNMENT bytes. - //! This value defaults to the size of pointer on target architecture. - //!

- //! To obtain absolutely top performance from the parser, - //! it is important that all nodes are allocated from a single, contiguous block of memory. - //! Otherwise, cache misses when jumping between two (or more) disjoint blocks of memory can slow down parsing quite considerably. - //! If required, you can tweak RAPIDXML_STATIC_POOL_SIZE, RAPIDXML_DYNAMIC_POOL_SIZE and RAPIDXML_ALIGNMENT - //! to obtain best wasted memory to performance compromise. - //! To do it, define their values before rapidxml.hpp file is included. - //! \param Ch Character type of created nodes. - template - class memory_pool + return node; + } + + //! Allocates a new attribute from the pool, and optionally assigns name and + //! value to it. If the allocation request cannot be accomodated, this + //! function will throw std::bad_alloc. If exceptions are + //! disabled by defining RAPIDXML_NO_EXCEPTIONS, this function will call + //! rapidxml::parse_error_handler() function. + //! \param name Name to assign to the attribute, or 0 to assign no name. + //! \param value Value to assign to the attribute, or 0 to assign no value. + //! \param name_size Size of name to assign, or 0 to automatically calculate + //! size from name string. + //! \param value_size Size of value to assign, or 0 to automatically calculate + //! size from value string. + //! \return Pointer to allocated attribute. This pointer will never be NULL. + xml_attribute *allocate_attribute(const Ch *name = 0, const Ch *value = 0, + std::size_t name_size = 0, + std::size_t value_size = 0) { + void *memory = allocate_aligned(sizeof(xml_attribute)); + xml_attribute *attribute = new (memory) xml_attribute; + if (name) { + if (name_size > 0) + attribute->name(name, name_size); + else + attribute->name(name); + } + if (value) { + if (value_size > 0) + attribute->value(value, value_size); + else + attribute->value(value); + } + return attribute; + } + + //! Allocates a char array of given size from the pool, and optionally copies + //! a given string to it. If the allocation request cannot be accomodated, + //! this function will throw std::bad_alloc. If exceptions are + //! disabled by defining RAPIDXML_NO_EXCEPTIONS, this function will call + //! rapidxml::parse_error_handler() function. + //! \param source String to initialize the allocated memory with, or 0 to not + //! initialize it. + //! \param size Number of characters to allocate, or zero to calculate it + //! automatically from source string length; if size is 0, source string must + //! be specified and null terminated. + //! \return Pointer to allocated char array. This pointer will never be NULL. + Ch *allocate_string(const Ch *source = 0, std::size_t size = 0) { + assert(source || size); // Either source or size (or both) must be specified + if (size == 0) + size = internal::measure(source) + 1; + Ch *result = static_cast(allocate_aligned(size * sizeof(Ch))); + if (source) + for (std::size_t i = 0; i < size; ++i) + result[i] = source[i]; + return result; + } + + //! Clones an xml_node and its hierarchy of child nodes and attributes. + //! Nodes and attributes are allocated from this memory pool. + //! Names and values are not cloned, they are shared between the clone and the + //! source. Result node can be optionally specified as a second parameter, in + //! which case its contents will be replaced with cloned source node. This is + //! useful when you want to clone entire document. + //! \param source Node to clone. + //! \param result Node to put results in, or 0 to automatically allocate + //! result node + //! \return Pointer to cloned node. This pointer will never be NULL. + xml_node *clone_node(const xml_node *source, + xml_node *result = 0) { + // Prepare result node + if (result) { + result->remove_all_attributes(); + result->remove_all_nodes(); + result->type(source->type()); + } else + result = allocate_node(source->type()); + + // Clone name and value + result->name(source->name(), source->name_size()); + result->value(source->value(), source->value_size()); + + // Clone child nodes and attributes + for (xml_node *child = source->first_node(); child; + child = child->next_sibling()) + result->append_node(clone_node(child)); + for (xml_attribute *attr = source->first_attribute(); attr; + attr = attr->next_attribute()) + result->append_attribute(allocate_attribute( + attr->name(), attr->value(), attr->name_size(), attr->value_size())); + + return result; + } + + //! Clears the pool. + //! This causes memory occupied by nodes allocated by the pool to be freed. + //! Any nodes or strings allocated from the pool will no longer be valid. + void clear() { + while (m_begin != m_static_memory) { + char *previous_begin = + reinterpret_cast
(align(m_begin))->previous_begin; + if (m_free_func) + m_free_func(m_begin); + else + delete[] m_begin; + m_begin = previous_begin; + } + init(); + } + + //! Sets or resets the user-defined memory allocation functions for the pool. + //! This can only be called when no memory is allocated from the pool yet, + //! otherwise results are undefined. Allocation function must not return + //! invalid pointer on failure. It should either throw, stop the program, or + //! use longjmp() function to pass control to other place of + //! program. If it returns invalid pointer, results are undefined.

+ //! User defined allocation functions must have the following forms: + //!
+ //!
void *allocate(std::size_t size); + //!
void free(void *pointer); + //!

+ //! \param af Allocation function, or 0 to restore default function + //! \param ff Free function, or 0 to restore default function + void set_allocator(alloc_func *af, free_func *ff) { + assert(m_begin == m_static_memory && + m_ptr == align(m_begin)); // Verify that no memory is allocated yet + m_alloc_func = af; + m_free_func = ff; + } + +private: + struct header { + char *previous_begin; + }; + + void init() { + m_begin = m_static_memory; + m_ptr = align(m_begin); + m_end = m_static_memory + sizeof(m_static_memory); + } + + char *align(char *ptr) { + std::size_t alignment = + ((RAPIDXML_ALIGNMENT - (std::size_t(ptr) & (RAPIDXML_ALIGNMENT - 1))) & + (RAPIDXML_ALIGNMENT - 1)); + return ptr + alignment; + } + + char *allocate_raw(std::size_t size) { + // Allocate + void *memory; + if (m_alloc_func) // Allocate memory using either user-specified allocation + // function or global operator new[] { - - public: - - //! \cond internal - typedef void *(alloc_func)(std::size_t); // Type of user-defined function used to allocate memory - typedef void (free_func)(void *); // Type of user-defined function used to free memory - //! \endcond - - //! Constructs empty pool with default allocator functions. - memory_pool() - : m_alloc_func(0) - , m_free_func(0) - { - init(); - } - - //! Destroys pool and frees all the memory. - //! This causes memory occupied by nodes allocated by the pool to be freed. - //! Nodes allocated from the pool are no longer valid. - ~memory_pool() - { - clear(); - } - - //! Allocates a new node from the pool, and optionally assigns name and value to it. - //! If the allocation request cannot be accomodated, this function will throw std::bad_alloc. - //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function - //! will call rapidxml::parse_error_handler() function. - //! \param type Type of node to create. - //! \param name Name to assign to the node, or 0 to assign no name. - //! \param value Value to assign to the node, or 0 to assign no value. - //! \param name_size Size of name to assign, or 0 to automatically calculate size from name string. - //! \param value_size Size of value to assign, or 0 to automatically calculate size from value string. - //! \return Pointer to allocated node. This pointer will never be NULL. - xml_node *allocate_node(node_type type, - const Ch *name = 0, const Ch *value = 0, - std::size_t name_size = 0, std::size_t value_size = 0) - { - void *memory = allocate_aligned(sizeof(xml_node)); - xml_node *node = new(memory) xml_node(type); - if (name) - { - if (name_size > 0) - node->name(name, name_size); - else - node->name(name); - } - if (value) - { - if (value_size > 0) - node->value(value, value_size); - else - node->value(value); - } - return node; - } - - //! Allocates a new attribute from the pool, and optionally assigns name and value to it. - //! If the allocation request cannot be accomodated, this function will throw std::bad_alloc. - //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function - //! will call rapidxml::parse_error_handler() function. - //! \param name Name to assign to the attribute, or 0 to assign no name. - //! \param value Value to assign to the attribute, or 0 to assign no value. - //! \param name_size Size of name to assign, or 0 to automatically calculate size from name string. - //! \param value_size Size of value to assign, or 0 to automatically calculate size from value string. - //! \return Pointer to allocated attribute. This pointer will never be NULL. - xml_attribute *allocate_attribute(const Ch *name = 0, const Ch *value = 0, - std::size_t name_size = 0, std::size_t value_size = 0) - { - void *memory = allocate_aligned(sizeof(xml_attribute)); - xml_attribute *attribute = new(memory) xml_attribute; - if (name) - { - if (name_size > 0) - attribute->name(name, name_size); - else - attribute->name(name); - } - if (value) - { - if (value_size > 0) - attribute->value(value, value_size); - else - attribute->value(value); - } - return attribute; - } - - //! Allocates a char array of given size from the pool, and optionally copies a given string to it. - //! If the allocation request cannot be accomodated, this function will throw std::bad_alloc. - //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function - //! will call rapidxml::parse_error_handler() function. - //! \param source String to initialize the allocated memory with, or 0 to not initialize it. - //! \param size Number of characters to allocate, or zero to calculate it automatically from source string length; if size is 0, source string must be specified and null terminated. - //! \return Pointer to allocated char array. This pointer will never be NULL. - Ch *allocate_string(const Ch *source = 0, std::size_t size = 0) - { - assert(source || size); // Either source or size (or both) must be specified - if (size == 0) - size = internal::measure(source) + 1; - Ch *result = static_cast(allocate_aligned(size * sizeof(Ch))); - if (source) - for (std::size_t i = 0; i < size; ++i) - result[i] = source[i]; - return result; - } - - //! Clones an xml_node and its hierarchy of child nodes and attributes. - //! Nodes and attributes are allocated from this memory pool. - //! Names and values are not cloned, they are shared between the clone and the source. - //! Result node can be optionally specified as a second parameter, - //! in which case its contents will be replaced with cloned source node. - //! This is useful when you want to clone entire document. - //! \param source Node to clone. - //! \param result Node to put results in, or 0 to automatically allocate result node - //! \return Pointer to cloned node. This pointer will never be NULL. - xml_node *clone_node(const xml_node *source, xml_node *result = 0) - { - // Prepare result node - if (result) - { - result->remove_all_attributes(); - result->remove_all_nodes(); - result->type(source->type()); - } - else - result = allocate_node(source->type()); - - // Clone name and value - result->name(source->name(), source->name_size()); - result->value(source->value(), source->value_size()); - - // Clone child nodes and attributes - for (xml_node *child = source->first_node(); child; child = child->next_sibling()) - result->append_node(clone_node(child)); - for (xml_attribute *attr = source->first_attribute(); attr; attr = attr->next_attribute()) - result->append_attribute(allocate_attribute(attr->name(), attr->value(), attr->name_size(), attr->value_size())); - - return result; - } - - //! Clears the pool. - //! This causes memory occupied by nodes allocated by the pool to be freed. - //! Any nodes or strings allocated from the pool will no longer be valid. - void clear() - { - while (m_begin != m_static_memory) - { - char *previous_begin = reinterpret_cast
(align(m_begin))->previous_begin; - if (m_free_func) - m_free_func(m_begin); - else - delete[] m_begin; - m_begin = previous_begin; - } - init(); - } - - //! Sets or resets the user-defined memory allocation functions for the pool. - //! This can only be called when no memory is allocated from the pool yet, otherwise results are undefined. - //! Allocation function must not return invalid pointer on failure. It should either throw, - //! stop the program, or use longjmp() function to pass control to other place of program. - //! If it returns invalid pointer, results are undefined. - //!

- //! User defined allocation functions must have the following forms: - //!
- //!
void *allocate(std::size_t size); - //!
void free(void *pointer); - //!

- //! \param af Allocation function, or 0 to restore default function - //! \param ff Free function, or 0 to restore default function - void set_allocator(alloc_func *af, free_func *ff) - { - assert(m_begin == m_static_memory && m_ptr == align(m_begin)); // Verify that no memory is allocated yet - m_alloc_func = af; - m_free_func = ff; - } - - private: - - struct header - { - char *previous_begin; - }; - - void init() - { - m_begin = m_static_memory; - m_ptr = align(m_begin); - m_end = m_static_memory + sizeof(m_static_memory); - } - - char *align(char *ptr) - { - std::size_t alignment = ((RAPIDXML_ALIGNMENT - (std::size_t(ptr) & (RAPIDXML_ALIGNMENT - 1))) & (RAPIDXML_ALIGNMENT - 1)); - return ptr + alignment; - } - - char *allocate_raw(std::size_t size) - { - // Allocate - void *memory; - if (m_alloc_func) // Allocate memory using either user-specified allocation function or global operator new[] - { - memory = m_alloc_func(size); - assert(memory); // Allocator is not allowed to return 0, on failure it must either throw, stop the program or use longjmp - } - else - { - memory = new char[size]; + memory = m_alloc_func(size); + assert(memory); // Allocator is not allowed to return 0, on failure it + // must either throw, stop the program or use longjmp + } else { + memory = new char[size]; #ifdef RAPIDXML_NO_EXCEPTIONS - if (!memory) // If exceptions are disabled, verify memory allocation, because new will not be able to throw bad_alloc - RAPIDXML_PARSE_ERROR("out of memory", 0); + if (!memory) // If exceptions are disabled, verify memory allocation, + // because new will not be able to throw bad_alloc + RAPIDXML_PARSE_ERROR("out of memory", 0); #endif - } - return static_cast(memory); - } - - void *allocate_aligned(std::size_t size) - { - // Calculate aligned pointer - char *result = align(m_ptr); - - // If not enough memory left in current pool, allocate a new pool - if (result + size > m_end) - { - // Calculate required pool size (may be bigger than RAPIDXML_DYNAMIC_POOL_SIZE) - std::size_t pool_size = RAPIDXML_DYNAMIC_POOL_SIZE; - if (pool_size < size) - pool_size = size; - - // Allocate - std::size_t alloc_size = sizeof(header) + (2 * RAPIDXML_ALIGNMENT - 2) + pool_size; // 2 alignments required in worst case: one for header, one for actual allocation - char *raw_memory = allocate_raw(alloc_size); - - // Setup new pool in allocated memory - char *pool = align(raw_memory); - header *new_header = reinterpret_cast
(pool); - new_header->previous_begin = m_begin; - m_begin = raw_memory; - m_ptr = pool + sizeof(header); - m_end = raw_memory + alloc_size; - - // Calculate aligned pointer again using new pool - result = align(m_ptr); - } - - // Update pool and return aligned pointer - m_ptr = result + size; - return result; - } - - char *m_begin; // Start of raw memory making up current pool - char *m_ptr; // First free byte in current pool - char *m_end; // One past last available byte in current pool - char m_static_memory[RAPIDXML_STATIC_POOL_SIZE]; // Static raw memory - alloc_func *m_alloc_func; // Allocator function, or 0 if default is to be used - free_func *m_free_func; // Free function, or 0 if default is to be used - }; - - /////////////////////////////////////////////////////////////////////////// - // XML base - - //! Base class for xml_node and xml_attribute implementing common functions: - //! name(), name_size(), value(), value_size() and parent(). - //! \param Ch Character type to use - template - class xml_base - { - - public: - - /////////////////////////////////////////////////////////////////////////// - // Construction & destruction - - // Construct a base with empty name, value and parent - xml_base() - : m_name(0) - , m_value(0) - , m_parent(0) - { - } - - /////////////////////////////////////////////////////////////////////////// - // Node data access - - //! Gets name of the node. - //! Interpretation of name depends on type of node. - //! Note that name will not be zero-terminated if rapidxml::parse_no_string_terminators option was selected during parse. - //!

- //! Use name_size() function to determine length of the name. - //! \return Name of node, or empty string if node has no name. - Ch *name() const - { - return m_name ? m_name : nullstr(); - } - - //! Gets size of node name, not including terminator character. - //! This function works correctly irrespective of whether name is or is not zero terminated. - //! \return Size of node name, in characters. - std::size_t name_size() const - { - return m_name ? m_name_size : 0; - } - - //! Gets value of node. - //! Interpretation of value depends on type of node. - //! Note that value will not be zero-terminated if rapidxml::parse_no_string_terminators option was selected during parse. - //!

- //! Use value_size() function to determine length of the value. - //! \return Value of node, or empty string if node has no value. - Ch *value() const - { - return m_value ? m_value : nullstr(); - } - - //! Gets size of node value, not including terminator character. - //! This function works correctly irrespective of whether value is or is not zero terminated. - //! \return Size of node value, in characters. - std::size_t value_size() const - { - return m_value ? m_value_size : 0; - } - - /////////////////////////////////////////////////////////////////////////// - // Node modification - - //! Sets name of node to a non zero-terminated string. - //! See \ref ownership_of_strings. - //!

- //! Note that node does not own its name or value, it only stores a pointer to it. - //! It will not delete or otherwise free the pointer on destruction. - //! It is reponsibility of the user to properly manage lifetime of the string. - //! The easiest way to achieve it is to use memory_pool of the document to allocate the string - - //! on destruction of the document the string will be automatically freed. - //!

- //! Size of name must be specified separately, because name does not have to be zero terminated. - //! Use name(const Ch *) function to have the length automatically calculated (string must be zero terminated). - //! \param name Name of node to set. Does not have to be zero terminated. - //! \param size Size of name, in characters. This does not include zero terminator, if one is present. - void name(const Ch *name, std::size_t size) - { - m_name = const_cast(name); - m_name_size = size; - } - - //! Sets name of node to a zero-terminated string. - //! See also \ref ownership_of_strings and xml_node::name(const Ch *, std::size_t). - //! \param name Name of node to set. Must be zero terminated. - void name(const Ch *name) - { - this->name(name, internal::measure(name)); - } - - //! Sets value of node to a non zero-terminated string. - //! See \ref ownership_of_strings. - //!

- //! Note that node does not own its name or value, it only stores a pointer to it. - //! It will not delete or otherwise free the pointer on destruction. - //! It is reponsibility of the user to properly manage lifetime of the string. - //! The easiest way to achieve it is to use memory_pool of the document to allocate the string - - //! on destruction of the document the string will be automatically freed. - //!

- //! Size of value must be specified separately, because it does not have to be zero terminated. - //! Use value(const Ch *) function to have the length automatically calculated (string must be zero terminated). - //!

- //! If an element has a child node of type node_data, it will take precedence over element value when printing. - //! If you want to manipulate data of elements using values, use parser flag rapidxml::parse_no_data_nodes to prevent creation of data nodes by the parser. - //! \param value value of node to set. Does not have to be zero terminated. - //! \param size Size of value, in characters. This does not include zero terminator, if one is present. - void value(const Ch *value, std::size_t size) - { - m_value = const_cast(value); - m_value_size = size; - } - - //! Sets value of node to a zero-terminated string. - //! See also \ref ownership_of_strings and xml_node::value(const Ch *, std::size_t). - //! \param value Vame of node to set. Must be zero terminated. - void value(const Ch *value) - { - this->value(value, internal::measure(value)); - } - - /////////////////////////////////////////////////////////////////////////// - // Related nodes access - - //! Gets node parent. - //! \return Pointer to parent node, or 0 if there is no parent. - xml_node *parent() const - { - return m_parent; - } - - protected: - - // Return empty string - static Ch *nullstr() - { - static Ch zero = Ch('\0'); - return &zero; - } - - Ch *m_name; // Name of node, or 0 if no name - Ch *m_value; // Value of node, or 0 if no value - std::size_t m_name_size; // Length of node name, or undefined of no name - std::size_t m_value_size; // Length of node value, or undefined if no value - xml_node *m_parent; // Pointer to parent node, or 0 if none - - }; - - //! Class representing attribute node of XML document. - //! Each attribute has name and value strings, which are available through name() and value() functions (inherited from xml_base). - //! Note that after parse, both name and value of attribute will point to interior of source text used for parsing. - //! Thus, this text must persist in memory for the lifetime of attribute. - //! \param Ch Character type to use. - template - class xml_attribute: public xml_base - { - - friend class xml_node; - - public: - - /////////////////////////////////////////////////////////////////////////// - // Construction & destruction - - //! Constructs an empty attribute with the specified type. - //! Consider using memory_pool of appropriate xml_document if allocating attributes manually. - xml_attribute() - { - } - - /////////////////////////////////////////////////////////////////////////// - // Related nodes access - - //! Gets document of which attribute is a child. - //! \return Pointer to document that contains this attribute, or 0 if there is no parent document. - xml_document *document() const - { - if (xml_node *node = this->parent()) - { - while (node->parent()) - node = node->parent(); - return node->type() == node_document ? static_cast *>(node) : 0; - } - else - return 0; - } - - //! Gets previous attribute, optionally matching attribute name. - //! \param name Name of attribute to find, or 0 to return previous attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero - //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string - //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters - //! \return Pointer to found attribute, or 0 if not found. - xml_attribute *previous_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const - { - if (name) - { - if (name_size == 0) - name_size = internal::measure(name); - for (xml_attribute *attribute = m_prev_attribute; attribute; attribute = attribute->m_prev_attribute) - if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) - return attribute; - return 0; - } - else - return this->m_parent ? m_prev_attribute : 0; - } - - //! Gets next attribute, optionally matching attribute name. - //! \param name Name of attribute to find, or 0 to return next attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero - //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string - //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters - //! \return Pointer to found attribute, or 0 if not found. - xml_attribute *next_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const - { - if (name) - { - if (name_size == 0) - name_size = internal::measure(name); - for (xml_attribute *attribute = m_next_attribute; attribute; attribute = attribute->m_next_attribute) - if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) - return attribute; - return 0; - } - else - return this->m_parent ? m_next_attribute : 0; - } - - private: - - xml_attribute *m_prev_attribute; // Pointer to previous sibling of attribute, or 0 if none; only valid if parent is non-zero - xml_attribute *m_next_attribute; // Pointer to next sibling of attribute, or 0 if none; only valid if parent is non-zero - - }; - - /////////////////////////////////////////////////////////////////////////// - // XML node - - //! Class representing a node of XML document. - //! Each node may have associated name and value strings, which are available through name() and value() functions. - //! Interpretation of name and value depends on type of the node. - //! Type of node can be determined by using type() function. - //!

- //! Note that after parse, both name and value of node, if any, will point interior of source text used for parsing. - //! Thus, this text must persist in the memory for the lifetime of node. - //! \param Ch Character type to use. - template - class xml_node: public xml_base - { - - public: - - /////////////////////////////////////////////////////////////////////////// - // Construction & destruction - - //! Constructs an empty node with the specified type. - //! Consider using memory_pool of appropriate document to allocate nodes manually. - //! \param type Type of node to construct. - xml_node(node_type type) - : m_type(type) - , m_first_node(0) - , m_first_attribute(0) - { - } - - /////////////////////////////////////////////////////////////////////////// - // Node data access - - //! Gets type of node. - //! \return Type of node. - node_type type() const - { - return m_type; - } - - /////////////////////////////////////////////////////////////////////////// - // Related nodes access - - //! Gets document of which node is a child. - //! \return Pointer to document that contains this node, or 0 if there is no parent document. - xml_document *document() const - { - xml_node *node = const_cast *>(this); - while (node->parent()) - node = node->parent(); - return node->type() == node_document ? static_cast *>(node) : 0; - } - - //! Gets first child node, optionally matching node name. - //! \param name Name of child to find, or 0 to return first child regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero - //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string - //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters - //! \return Pointer to found child, or 0 if not found. - xml_node *first_node(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const - { - if (name) - { - if (name_size == 0) - name_size = internal::measure(name); - for (xml_node *child = m_first_node; child; child = child->next_sibling()) - if (internal::compare(child->name(), child->name_size(), name, name_size, case_sensitive)) - return child; - return 0; - } - else - return m_first_node; - } - - //! Gets last child node, optionally matching node name. - //! Behaviour is undefined if node has no children. - //! Use first_node() to test if node has children. - //! \param name Name of child to find, or 0 to return last child regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero - //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string - //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters - //! \return Pointer to found child, or 0 if not found. - xml_node *last_node(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const - { - assert(m_first_node); // Cannot query for last child if node has no children - if (name) - { - if (name_size == 0) - name_size = internal::measure(name); - for (xml_node *child = m_last_node; child; child = child->previous_sibling()) - if (internal::compare(child->name(), child->name_size(), name, name_size, case_sensitive)) - return child; - return 0; - } - else - return m_last_node; - } - - //! Gets previous sibling node, optionally matching node name. - //! Behaviour is undefined if node has no parent. - //! Use parent() to test if node has a parent. - //! \param name Name of sibling to find, or 0 to return previous sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero - //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string - //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters - //! \return Pointer to found sibling, or 0 if not found. - xml_node *previous_sibling(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const - { - assert(this->m_parent); // Cannot query for siblings if node has no parent - if (name) - { - if (name_size == 0) - name_size = internal::measure(name); - for (xml_node *sibling = m_prev_sibling; sibling; sibling = sibling->m_prev_sibling) - if (internal::compare(sibling->name(), sibling->name_size(), name, name_size, case_sensitive)) - return sibling; - return 0; - } - else - return m_prev_sibling; - } - - //! Gets next sibling node, optionally matching node name. - //! Behaviour is undefined if node has no parent. - //! Use parent() to test if node has a parent. - //! \param name Name of sibling to find, or 0 to return next sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero - //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string - //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters - //! \return Pointer to found sibling, or 0 if not found. - xml_node *next_sibling(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const - { - assert(this->m_parent); // Cannot query for siblings if node has no parent - if (name) - { - if (name_size == 0) - name_size = internal::measure(name); - for (xml_node *sibling = m_next_sibling; sibling; sibling = sibling->m_next_sibling) - if (internal::compare(sibling->name(), sibling->name_size(), name, name_size, case_sensitive)) - return sibling; - return 0; - } - else - return m_next_sibling; - } - - //! Gets first attribute of node, optionally matching attribute name. - //! \param name Name of attribute to find, or 0 to return first attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero - //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string - //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters - //! \return Pointer to found attribute, or 0 if not found. - xml_attribute *first_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const - { - if (name) - { - if (name_size == 0) - name_size = internal::measure(name); - for (xml_attribute *attribute = m_first_attribute; attribute; attribute = attribute->m_next_attribute) - if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) - return attribute; - return 0; - } - else - return m_first_attribute; - } - - //! Gets last attribute of node, optionally matching attribute name. - //! \param name Name of attribute to find, or 0 to return last attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero - //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string - //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters - //! \return Pointer to found attribute, or 0 if not found. - xml_attribute *last_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const - { - if (name) - { - if (name_size == 0) - name_size = internal::measure(name); - for (xml_attribute *attribute = m_last_attribute; attribute; attribute = attribute->m_prev_attribute) - if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) - return attribute; - return 0; - } - else - return m_first_attribute ? m_last_attribute : 0; - } - - /////////////////////////////////////////////////////////////////////////// - // Node modification - - //! Sets type of node. - //! \param type Type of node to set. - void type(node_type type) - { - m_type = type; - } - - /////////////////////////////////////////////////////////////////////////// - // Node manipulation - - //! Prepends a new child node. - //! The prepended child becomes the first child, and all existing children are moved one position back. - //! \param child Node to prepend. - void prepend_node(xml_node *child) - { - assert(child && !child->parent() && child->type() != node_document); - if (first_node()) - { - child->m_next_sibling = m_first_node; - m_first_node->m_prev_sibling = child; - } - else - { - child->m_next_sibling = 0; - m_last_node = child; - } - m_first_node = child; - child->m_parent = this; - child->m_prev_sibling = 0; - } - - //! Appends a new child node. - //! The appended child becomes the last child. - //! \param child Node to append. - void append_node(xml_node *child) - { - assert(child && !child->parent() && child->type() != node_document); - if (first_node()) - { - child->m_prev_sibling = m_last_node; - m_last_node->m_next_sibling = child; - } - else - { - child->m_prev_sibling = 0; - m_first_node = child; - } - m_last_node = child; - child->m_parent = this; - child->m_next_sibling = 0; - } - - //! Inserts a new child node at specified place inside the node. - //! All children after and including the specified node are moved one position back. - //! \param where Place where to insert the child, or 0 to insert at the back. - //! \param child Node to insert. - void insert_node(xml_node *where, xml_node *child) - { - assert(!where || where->parent() == this); - assert(child && !child->parent() && child->type() != node_document); - if (where == m_first_node) - prepend_node(child); - else if (where == 0) - append_node(child); - else - { - child->m_prev_sibling = where->m_prev_sibling; - child->m_next_sibling = where; - where->m_prev_sibling->m_next_sibling = child; - where->m_prev_sibling = child; - child->m_parent = this; - } - } - - //! Removes first child node. - //! If node has no children, behaviour is undefined. - //! Use first_node() to test if node has children. - void remove_first_node() - { - assert(first_node()); - xml_node *child = m_first_node; - m_first_node = child->m_next_sibling; - if (child->m_next_sibling) - child->m_next_sibling->m_prev_sibling = 0; - else - m_last_node = 0; - child->m_parent = 0; - } - - //! Removes last child of the node. - //! If node has no children, behaviour is undefined. - //! Use first_node() to test if node has children. - void remove_last_node() - { - assert(first_node()); - xml_node *child = m_last_node; - if (child->m_prev_sibling) - { - m_last_node = child->m_prev_sibling; - child->m_prev_sibling->m_next_sibling = 0; - } - else - m_first_node = 0; - child->m_parent = 0; - } - - //! Removes specified child from the node - // \param where Pointer to child to be removed. - void remove_node(xml_node *where) - { - assert(where && where->parent() == this); - assert(first_node()); - if (where == m_first_node) - remove_first_node(); - else if (where == m_last_node) - remove_last_node(); - else - { - where->m_prev_sibling->m_next_sibling = where->m_next_sibling; - where->m_next_sibling->m_prev_sibling = where->m_prev_sibling; - where->m_parent = 0; - } - } - - //! Removes all child nodes (but not attributes). - void remove_all_nodes() - { - for (xml_node *node = first_node(); node; node = node->m_next_sibling) - node->m_parent = 0; - m_first_node = 0; - } + } + return static_cast(memory); + } + + void *allocate_aligned(std::size_t size) { + // Calculate aligned pointer + char *result = align(m_ptr); + + // If not enough memory left in current pool, allocate a new pool + if (result + size > m_end) { + // Calculate required pool size (may be bigger than + // RAPIDXML_DYNAMIC_POOL_SIZE) + std::size_t pool_size = RAPIDXML_DYNAMIC_POOL_SIZE; + if (pool_size < size) + pool_size = size; + + // Allocate + std::size_t alloc_size = + sizeof(header) + (2 * RAPIDXML_ALIGNMENT - 2) + + pool_size; // 2 alignments required in worst case: one for header, one + // for actual allocation + char *raw_memory = allocate_raw(alloc_size); + + // Setup new pool in allocated memory + char *pool = align(raw_memory); + header *new_header = reinterpret_cast
(pool); + new_header->previous_begin = m_begin; + m_begin = raw_memory; + m_ptr = pool + sizeof(header); + m_end = raw_memory + alloc_size; + + // Calculate aligned pointer again using new pool + result = align(m_ptr); + } - //! Prepends a new attribute to the node. - //! \param attribute Attribute to prepend. - void prepend_attribute(xml_attribute *attribute) - { - assert(attribute && !attribute->parent()); - if (first_attribute()) - { - attribute->m_next_attribute = m_first_attribute; - m_first_attribute->m_prev_attribute = attribute; - } - else - { - attribute->m_next_attribute = 0; - m_last_attribute = attribute; - } - m_first_attribute = attribute; - attribute->m_parent = this; - attribute->m_prev_attribute = 0; - } + // Update pool and return aligned pointer + m_ptr = result + size; + return result; + } - //! Appends a new attribute to the node. - //! \param attribute Attribute to append. - void append_attribute(xml_attribute *attribute) - { - assert(attribute && !attribute->parent()); - if (first_attribute()) - { - attribute->m_prev_attribute = m_last_attribute; - m_last_attribute->m_next_attribute = attribute; - } - else - { - attribute->m_prev_attribute = 0; - m_first_attribute = attribute; - } - m_last_attribute = attribute; - attribute->m_parent = this; - attribute->m_next_attribute = 0; - } + char *m_begin; // Start of raw memory making up current pool + char *m_ptr; // First free byte in current pool + char *m_end; // One past last available byte in current pool + char m_static_memory[RAPIDXML_STATIC_POOL_SIZE]; // Static raw memory + alloc_func *m_alloc_func; // Allocator function, or 0 if default is to be used + free_func *m_free_func; // Free function, or 0 if default is to be used +}; - //! Inserts a new attribute at specified place inside the node. - //! All attributes after and including the specified attribute are moved one position back. - //! \param where Place where to insert the attribute, or 0 to insert at the back. - //! \param attribute Attribute to insert. - void insert_attribute(xml_attribute *where, xml_attribute *attribute) - { - assert(!where || where->parent() == this); - assert(attribute && !attribute->parent()); - if (where == m_first_attribute) - prepend_attribute(attribute); - else if (where == 0) - append_attribute(attribute); - else - { - attribute->m_prev_attribute = where->m_prev_attribute; - attribute->m_next_attribute = where; - where->m_prev_attribute->m_next_attribute = attribute; - where->m_prev_attribute = attribute; - attribute->m_parent = this; - } - } +/////////////////////////////////////////////////////////////////////////// +// XML base + +//! Base class for xml_node and xml_attribute implementing common functions: +//! name(), name_size(), value(), value_size() and parent(). +//! \param Ch Character type to use +template class xml_base { + +public: + /////////////////////////////////////////////////////////////////////////// + // Construction & destruction + + // Construct a base with empty name, value and parent + xml_base() : m_name(0), m_value(0), m_parent(0) {} + + /////////////////////////////////////////////////////////////////////////// + // Node data access + + //! Gets name of the node. + //! Interpretation of name depends on type of node. + //! Note that name will not be zero-terminated if + //! rapidxml::parse_no_string_terminators option was selected during parse. + //!

+ //! Use name_size() function to determine length of the name. + //! \return Name of node, or empty string if node has no name. + Ch *name() const { return m_name ? m_name : nullstr(); } + + //! Gets size of node name, not including terminator character. + //! This function works correctly irrespective of whether name is or is not + //! zero terminated. + //! \return Size of node name, in characters. + std::size_t name_size() const { return m_name ? m_name_size : 0; } + + //! Gets value of node. + //! Interpretation of value depends on type of node. + //! Note that value will not be zero-terminated if + //! rapidxml::parse_no_string_terminators option was selected during parse. + //!

+ //! Use value_size() function to determine length of the value. + //! \return Value of node, or empty string if node has no value. + Ch *value() const { return m_value ? m_value : nullstr(); } + + //! Gets size of node value, not including terminator character. + //! This function works correctly irrespective of whether value is or is not + //! zero terminated. + //! \return Size of node value, in characters. + std::size_t value_size() const { return m_value ? m_value_size : 0; } + + /////////////////////////////////////////////////////////////////////////// + // Node modification + + //! Sets name of node to a non zero-terminated string. + //! See \ref ownership_of_strings. + //!

+ //! Note that node does not own its name or value, it only stores a pointer to + //! it. It will not delete or otherwise free the pointer on destruction. It is + //! reponsibility of the user to properly manage lifetime of the string. The + //! easiest way to achieve it is to use memory_pool of the document to + //! allocate the string - on destruction of the document the string will be + //! automatically freed.

Size of name must be specified separately, + //! because name does not have to be zero terminated. Use name(const Ch *) + //! function to have the length automatically calculated (string must be zero + //! terminated). + //! \param name Name of node to set. Does not have to be zero terminated. + //! \param size Size of name, in characters. This does not include zero + //! terminator, if one is present. + void name(const Ch *name, std::size_t size) { + m_name = const_cast(name); + m_name_size = size; + } + + //! Sets name of node to a zero-terminated string. + //! See also \ref ownership_of_strings and xml_node::name(const Ch *, + //! std::size_t). + //! \param name Name of node to set. Must be zero terminated. + void name(const Ch *name) { this->name(name, internal::measure(name)); } + + //! Sets value of node to a non zero-terminated string. + //! See \ref ownership_of_strings. + //!

+ //! Note that node does not own its name or value, it only stores a pointer to + //! it. It will not delete or otherwise free the pointer on destruction. It is + //! reponsibility of the user to properly manage lifetime of the string. The + //! easiest way to achieve it is to use memory_pool of the document to + //! allocate the string - on destruction of the document the string will be + //! automatically freed.

Size of value must be specified separately, + //! because it does not have to be zero terminated. Use value(const Ch *) + //! function to have the length automatically calculated (string must be zero + //! terminated).

If an element has a child node of type node_data, it + //! will take precedence over element value when printing. If you want to + //! manipulate data of elements using values, use parser flag + //! rapidxml::parse_no_data_nodes to prevent creation of data nodes by the + //! parser. + //! \param value value of node to set. Does not have to be zero terminated. + //! \param size Size of value, in characters. This does not include zero + //! terminator, if one is present. + void value(const Ch *value, std::size_t size) { + m_value = const_cast(value); + m_value_size = size; + } + + //! Sets value of node to a zero-terminated string. + //! See also \ref ownership_of_strings and xml_node::value(const Ch *, + //! std::size_t). + //! \param value Vame of node to set. Must be zero terminated. + void value(const Ch *value) { this->value(value, internal::measure(value)); } + + /////////////////////////////////////////////////////////////////////////// + // Related nodes access + + //! Gets node parent. + //! \return Pointer to parent node, or 0 if there is no parent. + xml_node *parent() const { return m_parent; } + +protected: + // Return empty string + static Ch *nullstr() { + static Ch zero = Ch('\0'); + return &zero; + } + + Ch *m_name; // Name of node, or 0 if no name + Ch *m_value; // Value of node, or 0 if no value + std::size_t m_name_size; // Length of node name, or undefined of no name + std::size_t m_value_size; // Length of node value, or undefined if no value + xml_node *m_parent; // Pointer to parent node, or 0 if none +}; + +//! Class representing attribute node of XML document. +//! Each attribute has name and value strings, which are available through +//! name() and value() functions (inherited from xml_base). Note that after +//! parse, both name and value of attribute will point to interior of source +//! text used for parsing. Thus, this text must persist in memory for the +//! lifetime of attribute. +//! \param Ch Character type to use. +template class xml_attribute : public xml_base { + + friend class xml_node; + +public: + /////////////////////////////////////////////////////////////////////////// + // Construction & destruction + + //! Constructs an empty attribute with the specified type. + //! Consider using memory_pool of appropriate xml_document if allocating + //! attributes manually. + xml_attribute() {} + + /////////////////////////////////////////////////////////////////////////// + // Related nodes access + + //! Gets document of which attribute is a child. + //! \return Pointer to document that contains this attribute, or 0 if there is + //! no parent document. + xml_document *document() const { + if (xml_node *node = this->parent()) { + while (node->parent()) + node = node->parent(); + return node->type() == node_document + ? static_cast *>(node) + : 0; + } else + return 0; + } + + //! Gets previous attribute, optionally matching attribute name. + //! \param name Name of attribute to find, or 0 to return previous attribute + //! regardless of its name; this string doesn't have to be zero-terminated if + //! name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated + //! automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non + //! case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found attribute, or 0 if not found. + xml_attribute *previous_attribute(const Ch *name = 0, + std::size_t name_size = 0, + bool case_sensitive = true) const { + if (name) { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_attribute *attribute = m_prev_attribute; attribute; + attribute = attribute->m_prev_attribute) + if (internal::compare(attribute->name(), attribute->name_size(), name, + name_size, case_sensitive)) + return attribute; + return 0; + } else + return this->m_parent ? m_prev_attribute : 0; + } + + //! Gets next attribute, optionally matching attribute name. + //! \param name Name of attribute to find, or 0 to return next attribute + //! regardless of its name; this string doesn't have to be zero-terminated if + //! name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated + //! automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non + //! case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found attribute, or 0 if not found. + xml_attribute *next_attribute(const Ch *name = 0, + std::size_t name_size = 0, + bool case_sensitive = true) const { + if (name) { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_attribute *attribute = m_next_attribute; attribute; + attribute = attribute->m_next_attribute) + if (internal::compare(attribute->name(), attribute->name_size(), name, + name_size, case_sensitive)) + return attribute; + return 0; + } else + return this->m_parent ? m_next_attribute : 0; + } + +private: + xml_attribute + *m_prev_attribute; // Pointer to previous sibling of attribute, or 0 if + // none; only valid if parent is non-zero + xml_attribute + *m_next_attribute; // Pointer to next sibling of attribute, or 0 if none; + // only valid if parent is non-zero +}; - //! Removes first attribute of the node. - //! If node has no attributes, behaviour is undefined. - //! Use first_attribute() to test if node has attributes. - void remove_first_attribute() - { - assert(first_attribute()); - xml_attribute *attribute = m_first_attribute; - if (attribute->m_next_attribute) - { - attribute->m_next_attribute->m_prev_attribute = 0; - } - else - m_last_attribute = 0; - attribute->m_parent = 0; - m_first_attribute = attribute->m_next_attribute; - } +/////////////////////////////////////////////////////////////////////////// +// XML node + +//! Class representing a node of XML document. +//! Each node may have associated name and value strings, which are available +//! through name() and value() functions. Interpretation of name and value +//! depends on type of the node. Type of node can be determined by using type() +//! function.

Note that after parse, both name and value of node, if +//! any, will point interior of source text used for parsing. Thus, this text +//! must persist in the memory for the lifetime of node. +//! \param Ch Character type to use. +template class xml_node : public xml_base { + +public: + /////////////////////////////////////////////////////////////////////////// + // Construction & destruction + + //! Constructs an empty node with the specified type. + //! Consider using memory_pool of appropriate document to allocate nodes + //! manually. + //! \param type Type of node to construct. + xml_node(node_type type) + : m_type(type), m_first_node(0), m_first_attribute(0) {} + + /////////////////////////////////////////////////////////////////////////// + // Node data access + + //! Gets type of node. + //! \return Type of node. + node_type type() const { return m_type; } + + /////////////////////////////////////////////////////////////////////////// + // Related nodes access + + //! Gets document of which node is a child. + //! \return Pointer to document that contains this node, or 0 if there is no + //! parent document. + xml_document *document() const { + xml_node *node = const_cast *>(this); + while (node->parent()) + node = node->parent(); + return node->type() == node_document ? static_cast *>(node) + : 0; + } + + //! Gets first child node, optionally matching node name. + //! \param name Name of child to find, or 0 to return first child regardless + //! of its name; this string doesn't have to be zero-terminated if name_size + //! is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated + //! automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non + //! case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found child, or 0 if not found. + xml_node *first_node(const Ch *name = 0, std::size_t name_size = 0, + bool case_sensitive = true) const { + if (name) { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_node *child = m_first_node; child; + child = child->next_sibling()) + if (internal::compare(child->name(), child->name_size(), name, + name_size, case_sensitive)) + return child; + return 0; + } else + return m_first_node; + } + + //! Gets last child node, optionally matching node name. + //! Behaviour is undefined if node has no children. + //! Use first_node() to test if node has children. + //! \param name Name of child to find, or 0 to return last child regardless of + //! its name; this string doesn't have to be zero-terminated if name_size is + //! non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated + //! automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non + //! case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found child, or 0 if not found. + xml_node *last_node(const Ch *name = 0, std::size_t name_size = 0, + bool case_sensitive = true) const { + assert(m_first_node); // Cannot query for last child if node has no children + if (name) { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_node *child = m_last_node; child; + child = child->previous_sibling()) + if (internal::compare(child->name(), child->name_size(), name, + name_size, case_sensitive)) + return child; + return 0; + } else + return m_last_node; + } + + //! Gets previous sibling node, optionally matching node name. + //! Behaviour is undefined if node has no parent. + //! Use parent() to test if node has a parent. + //! \param name Name of sibling to find, or 0 to return previous sibling + //! regardless of its name; this string doesn't have to be zero-terminated if + //! name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated + //! automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non + //! case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found sibling, or 0 if not found. + xml_node *previous_sibling(const Ch *name = 0, std::size_t name_size = 0, + bool case_sensitive = true) const { + assert(this->m_parent); // Cannot query for siblings if node has no parent + if (name) { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_node *sibling = m_prev_sibling; sibling; + sibling = sibling->m_prev_sibling) + if (internal::compare(sibling->name(), sibling->name_size(), name, + name_size, case_sensitive)) + return sibling; + return 0; + } else + return m_prev_sibling; + } + + //! Gets next sibling node, optionally matching node name. + //! Behaviour is undefined if node has no parent. + //! Use parent() to test if node has a parent. + //! \param name Name of sibling to find, or 0 to return next sibling + //! regardless of its name; this string doesn't have to be zero-terminated if + //! name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated + //! automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non + //! case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found sibling, or 0 if not found. + xml_node *next_sibling(const Ch *name = 0, std::size_t name_size = 0, + bool case_sensitive = true) const { + assert(this->m_parent); // Cannot query for siblings if node has no parent + if (name) { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_node *sibling = m_next_sibling; sibling; + sibling = sibling->m_next_sibling) + if (internal::compare(sibling->name(), sibling->name_size(), name, + name_size, case_sensitive)) + return sibling; + return 0; + } else + return m_next_sibling; + } + + //! Gets first attribute of node, optionally matching attribute name. + //! \param name Name of attribute to find, or 0 to return first attribute + //! regardless of its name; this string doesn't have to be zero-terminated if + //! name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated + //! automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non + //! case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found attribute, or 0 if not found. + xml_attribute *first_attribute(const Ch *name = 0, + std::size_t name_size = 0, + bool case_sensitive = true) const { + if (name) { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_attribute *attribute = m_first_attribute; attribute; + attribute = attribute->m_next_attribute) + if (internal::compare(attribute->name(), attribute->name_size(), name, + name_size, case_sensitive)) + return attribute; + return 0; + } else + return m_first_attribute; + } + + //! Gets last attribute of node, optionally matching attribute name. + //! \param name Name of attribute to find, or 0 to return last attribute + //! regardless of its name; this string doesn't have to be zero-terminated if + //! name_size is non-zero + //! \param name_size Size of name, in characters, or 0 to have size calculated + //! automatically from string + //! \param case_sensitive Should name comparison be case-sensitive; non + //! case-sensitive comparison works properly only for ASCII characters + //! \return Pointer to found attribute, or 0 if not found. + xml_attribute *last_attribute(const Ch *name = 0, + std::size_t name_size = 0, + bool case_sensitive = true) const { + if (name) { + if (name_size == 0) + name_size = internal::measure(name); + for (xml_attribute *attribute = m_last_attribute; attribute; + attribute = attribute->m_prev_attribute) + if (internal::compare(attribute->name(), attribute->name_size(), name, + name_size, case_sensitive)) + return attribute; + return 0; + } else + return m_first_attribute ? m_last_attribute : 0; + } + + /////////////////////////////////////////////////////////////////////////// + // Node modification + + //! Sets type of node. + //! \param type Type of node to set. + void type(node_type type) { m_type = type; } + + /////////////////////////////////////////////////////////////////////////// + // Node manipulation + + //! Prepends a new child node. + //! The prepended child becomes the first child, and all existing children are + //! moved one position back. + //! \param child Node to prepend. + void prepend_node(xml_node *child) { + assert(child && !child->parent() && child->type() != node_document); + if (first_node()) { + child->m_next_sibling = m_first_node; + m_first_node->m_prev_sibling = child; + } else { + child->m_next_sibling = 0; + m_last_node = child; + } + m_first_node = child; + child->m_parent = this; + child->m_prev_sibling = 0; + } + + //! Appends a new child node. + //! The appended child becomes the last child. + //! \param child Node to append. + void append_node(xml_node *child) { + assert(child && !child->parent() && child->type() != node_document); + if (first_node()) { + child->m_prev_sibling = m_last_node; + m_last_node->m_next_sibling = child; + } else { + child->m_prev_sibling = 0; + m_first_node = child; + } + m_last_node = child; + child->m_parent = this; + child->m_next_sibling = 0; + } + + //! Inserts a new child node at specified place inside the node. + //! All children after and including the specified node are moved one position + //! back. + //! \param where Place where to insert the child, or 0 to insert at the back. + //! \param child Node to insert. + void insert_node(xml_node *where, xml_node *child) { + assert(!where || where->parent() == this); + assert(child && !child->parent() && child->type() != node_document); + if (where == m_first_node) + prepend_node(child); + else if (where == 0) + append_node(child); + else { + child->m_prev_sibling = where->m_prev_sibling; + child->m_next_sibling = where; + where->m_prev_sibling->m_next_sibling = child; + where->m_prev_sibling = child; + child->m_parent = this; + } + } + + //! Removes first child node. + //! If node has no children, behaviour is undefined. + //! Use first_node() to test if node has children. + void remove_first_node() { + assert(first_node()); + xml_node *child = m_first_node; + m_first_node = child->m_next_sibling; + if (child->m_next_sibling) + child->m_next_sibling->m_prev_sibling = 0; + else + m_last_node = 0; + child->m_parent = 0; + } + + //! Removes last child of the node. + //! If node has no children, behaviour is undefined. + //! Use first_node() to test if node has children. + void remove_last_node() { + assert(first_node()); + xml_node *child = m_last_node; + if (child->m_prev_sibling) { + m_last_node = child->m_prev_sibling; + child->m_prev_sibling->m_next_sibling = 0; + } else + m_first_node = 0; + child->m_parent = 0; + } + + //! Removes specified child from the node + // \param where Pointer to child to be removed. + void remove_node(xml_node *where) { + assert(where && where->parent() == this); + assert(first_node()); + if (where == m_first_node) + remove_first_node(); + else if (where == m_last_node) + remove_last_node(); + else { + where->m_prev_sibling->m_next_sibling = where->m_next_sibling; + where->m_next_sibling->m_prev_sibling = where->m_prev_sibling; + where->m_parent = 0; + } + } + + //! Removes all child nodes (but not attributes). + void remove_all_nodes() { + for (xml_node *node = first_node(); node; node = node->m_next_sibling) + node->m_parent = 0; + m_first_node = 0; + } + + //! Prepends a new attribute to the node. + //! \param attribute Attribute to prepend. + void prepend_attribute(xml_attribute *attribute) { + assert(attribute && !attribute->parent()); + if (first_attribute()) { + attribute->m_next_attribute = m_first_attribute; + m_first_attribute->m_prev_attribute = attribute; + } else { + attribute->m_next_attribute = 0; + m_last_attribute = attribute; + } + m_first_attribute = attribute; + attribute->m_parent = this; + attribute->m_prev_attribute = 0; + } + + //! Appends a new attribute to the node. + //! \param attribute Attribute to append. + void append_attribute(xml_attribute *attribute) { + assert(attribute && !attribute->parent()); + if (first_attribute()) { + attribute->m_prev_attribute = m_last_attribute; + m_last_attribute->m_next_attribute = attribute; + } else { + attribute->m_prev_attribute = 0; + m_first_attribute = attribute; + } + m_last_attribute = attribute; + attribute->m_parent = this; + attribute->m_next_attribute = 0; + } + + //! Inserts a new attribute at specified place inside the node. + //! All attributes after and including the specified attribute are moved one + //! position back. + //! \param where Place where to insert the attribute, or 0 to insert at the + //! back. + //! \param attribute Attribute to insert. + void insert_attribute(xml_attribute *where, + xml_attribute *attribute) { + assert(!where || where->parent() == this); + assert(attribute && !attribute->parent()); + if (where == m_first_attribute) + prepend_attribute(attribute); + else if (where == 0) + append_attribute(attribute); + else { + attribute->m_prev_attribute = where->m_prev_attribute; + attribute->m_next_attribute = where; + where->m_prev_attribute->m_next_attribute = attribute; + where->m_prev_attribute = attribute; + attribute->m_parent = this; + } + } + + //! Removes first attribute of the node. + //! If node has no attributes, behaviour is undefined. + //! Use first_attribute() to test if node has attributes. + void remove_first_attribute() { + assert(first_attribute()); + xml_attribute *attribute = m_first_attribute; + if (attribute->m_next_attribute) { + attribute->m_next_attribute->m_prev_attribute = 0; + } else + m_last_attribute = 0; + attribute->m_parent = 0; + m_first_attribute = attribute->m_next_attribute; + } + + //! Removes last attribute of the node. + //! If node has no attributes, behaviour is undefined. + //! Use first_attribute() to test if node has attributes. + void remove_last_attribute() { + assert(first_attribute()); + xml_attribute *attribute = m_last_attribute; + if (attribute->m_prev_attribute) { + attribute->m_prev_attribute->m_next_attribute = 0; + m_last_attribute = attribute->m_prev_attribute; + } else + m_first_attribute = 0; + attribute->m_parent = 0; + } + + //! Removes specified attribute from node. + //! \param where Pointer to attribute to be removed. + void remove_attribute(xml_attribute *where) { + assert(first_attribute() && where->parent() == this); + if (where == m_first_attribute) + remove_first_attribute(); + else if (where == m_last_attribute) + remove_last_attribute(); + else { + where->m_prev_attribute->m_next_attribute = where->m_next_attribute; + where->m_next_attribute->m_prev_attribute = where->m_prev_attribute; + where->m_parent = 0; + } + } + + //! Removes all attributes of node. + void remove_all_attributes() { + for (xml_attribute *attribute = first_attribute(); attribute; + attribute = attribute->m_next_attribute) + attribute->m_parent = 0; + m_first_attribute = 0; + } + +private: + /////////////////////////////////////////////////////////////////////////// + // Restrictions + + // No copying + xml_node(const xml_node &); + void operator=(const xml_node &); + + /////////////////////////////////////////////////////////////////////////// + // Data members + + // Note that some of the pointers below have UNDEFINED values if certain other + // pointers are 0. This is required for maximum performance, as it allows the + // parser to omit initialization of unneded/redundant values. + // + // The rules are as follows: + // 1. first_node and first_attribute contain valid pointers, or 0 if node has + // no children/attributes respectively + // 2. last_node and last_attribute are valid only if node has at least one + // child/attribute respectively, otherwise they contain garbage + // 3. prev_sibling and next_sibling are valid only if node has a parent, + // otherwise they contain garbage + + node_type m_type; // Type of node; always valid + xml_node + *m_first_node; // Pointer to first child node, or 0 if none; always valid + xml_node *m_last_node; // Pointer to last child node, or 0 if none; this + // value is only valid if m_first_node is non-zero + xml_attribute *m_first_attribute; // Pointer to first attribute of node, + // or 0 if none; always valid + xml_attribute * + m_last_attribute; // Pointer to last attribute of node, or 0 if none; this + // value is only valid if m_first_attribute is non-zero + xml_node + *m_prev_sibling; // Pointer to previous sibling of node, or 0 if none; + // this value is only valid if m_parent is non-zero + xml_node + *m_next_sibling; // Pointer to next sibling of node, or 0 if none; this + // value is only valid if m_parent is non-zero +}; - //! Removes last attribute of the node. - //! If node has no attributes, behaviour is undefined. - //! Use first_attribute() to test if node has attributes. - void remove_last_attribute() - { - assert(first_attribute()); - xml_attribute *attribute = m_last_attribute; - if (attribute->m_prev_attribute) - { - attribute->m_prev_attribute->m_next_attribute = 0; - m_last_attribute = attribute->m_prev_attribute; - } - else - m_first_attribute = 0; - attribute->m_parent = 0; - } +/////////////////////////////////////////////////////////////////////////// +// XML document + +//! This class represents root of the DOM hierarchy. +//! It is also an xml_node and a memory_pool through public inheritance. +//! Use parse() function to build a DOM tree from a zero-terminated XML text +//! string. parse() function allocates memory for nodes and attributes by using +//! functions of xml_document, which are inherited from memory_pool. To access +//! root node of the document, use the document itself, as if it was an +//! xml_node. +//! \param Ch Character type to use. +template +class xml_document : public xml_node, public memory_pool { + +public: + //! Constructs empty XML document + xml_document() : xml_node(node_document) {} + + //! Parses zero-terminated XML string according to given flags. + //! Passed string will be modified by the parser, unless + //! rapidxml::parse_non_destructive flag is used. The string must persist for + //! the lifetime of the document. In case of error, rapidxml::parse_error + //! exception will be thrown.

If you want to parse contents of a + //! file, you must first load the file into the memory, and pass pointer to + //! its beginning. Make sure that data is zero-terminated.

Document + //! can be parsed into multiple times. Each new call to parse removes previous + //! nodes and attributes (if any), but does not clear memory pool. + //! \param text XML data to parse; pointer is non-const to denote fact that + //! this data may be modified by the parser. + template void parse(Ch *text) { + assert(text); + + // Remove current contents + this->remove_all_nodes(); + this->remove_all_attributes(); + + // Parse BOM, if any + parse_bom(text); + + // Parse children + while (1) { + // Skip whitespace before node + skip(text); + if (*text == 0) + break; + + // Parse and append new child + if (*text == Ch('<')) { + ++text; // Skip '<' + if (xml_node *node = parse_node(text)) + this->append_node(node); + } else + RAPIDXML_PARSE_ERROR("expected <", text); + } + } + + //! Clears the document by deleting all nodes and clearing the memory pool. + //! All nodes owned by document pool are destroyed. + void clear() { + this->remove_all_nodes(); + this->remove_all_attributes(); + memory_pool::clear(); + } + +private: + /////////////////////////////////////////////////////////////////////// + // Internal character utility functions + + // Detect whitespace character + struct whitespace_pred { + static unsigned char test(Ch ch) { + return internal::lookup_tables< + 0>::lookup_whitespace[static_cast(ch)]; + } + }; - //! Removes specified attribute from node. - //! \param where Pointer to attribute to be removed. - void remove_attribute(xml_attribute *where) - { - assert(first_attribute() && where->parent() == this); - if (where == m_first_attribute) - remove_first_attribute(); - else if (where == m_last_attribute) - remove_last_attribute(); - else - { - where->m_prev_attribute->m_next_attribute = where->m_next_attribute; - where->m_next_attribute->m_prev_attribute = where->m_prev_attribute; - where->m_parent = 0; - } - } + // Detect node name character + struct node_name_pred { + static unsigned char test(Ch ch) { + return internal::lookup_tables< + 0>::lookup_node_name[static_cast(ch)]; + } + }; - //! Removes all attributes of node. - void remove_all_attributes() - { - for (xml_attribute *attribute = first_attribute(); attribute; attribute = attribute->m_next_attribute) - attribute->m_parent = 0; - m_first_attribute = 0; - } - - private: - - /////////////////////////////////////////////////////////////////////////// - // Restrictions - - // No copying - xml_node(const xml_node &); - void operator =(const xml_node &); - - /////////////////////////////////////////////////////////////////////////// - // Data members - - // Note that some of the pointers below have UNDEFINED values if certain other pointers are 0. - // This is required for maximum performance, as it allows the parser to omit initialization of - // unneded/redundant values. - // - // The rules are as follows: - // 1. first_node and first_attribute contain valid pointers, or 0 if node has no children/attributes respectively - // 2. last_node and last_attribute are valid only if node has at least one child/attribute respectively, otherwise they contain garbage - // 3. prev_sibling and next_sibling are valid only if node has a parent, otherwise they contain garbage - - node_type m_type; // Type of node; always valid - xml_node *m_first_node; // Pointer to first child node, or 0 if none; always valid - xml_node *m_last_node; // Pointer to last child node, or 0 if none; this value is only valid if m_first_node is non-zero - xml_attribute *m_first_attribute; // Pointer to first attribute of node, or 0 if none; always valid - xml_attribute *m_last_attribute; // Pointer to last attribute of node, or 0 if none; this value is only valid if m_first_attribute is non-zero - xml_node *m_prev_sibling; // Pointer to previous sibling of node, or 0 if none; this value is only valid if m_parent is non-zero - xml_node *m_next_sibling; // Pointer to next sibling of node, or 0 if none; this value is only valid if m_parent is non-zero - - }; - - /////////////////////////////////////////////////////////////////////////// - // XML document - - //! This class represents root of the DOM hierarchy. - //! It is also an xml_node and a memory_pool through public inheritance. - //! Use parse() function to build a DOM tree from a zero-terminated XML text string. - //! parse() function allocates memory for nodes and attributes by using functions of xml_document, - //! which are inherited from memory_pool. - //! To access root node of the document, use the document itself, as if it was an xml_node. - //! \param Ch Character type to use. - template - class xml_document: public xml_node, public memory_pool - { - - public: + // Detect attribute name character + struct attribute_name_pred { + static unsigned char test(Ch ch) { + return internal::lookup_tables< + 0>::lookup_attribute_name[static_cast(ch)]; + } + }; - //! Constructs empty XML document - xml_document() - : xml_node(node_document) - { - } + // Detect text character (PCDATA) + struct text_pred { + static unsigned char test(Ch ch) { + return internal::lookup_tables<0>::lookup_text[static_cast( + ch)]; + } + }; - //! Parses zero-terminated XML string according to given flags. - //! Passed string will be modified by the parser, unless rapidxml::parse_non_destructive flag is used. - //! The string must persist for the lifetime of the document. - //! In case of error, rapidxml::parse_error exception will be thrown. - //!

- //! If you want to parse contents of a file, you must first load the file into the memory, and pass pointer to its beginning. - //! Make sure that data is zero-terminated. - //!

- //! Document can be parsed into multiple times. - //! Each new call to parse removes previous nodes and attributes (if any), but does not clear memory pool. - //! \param text XML data to parse; pointer is non-const to denote fact that this data may be modified by the parser. - template - void parse(Ch *text) - { - assert(text); - - // Remove current contents - this->remove_all_nodes(); - this->remove_all_attributes(); - - // Parse BOM, if any - parse_bom(text); - - // Parse children - while (1) - { - // Skip whitespace before node - skip(text); - if (*text == 0) - break; - - // Parse and append new child - if (*text == Ch('<')) - { - ++text; // Skip '<' - if (xml_node *node = parse_node(text)) - this->append_node(node); - } - else - RAPIDXML_PARSE_ERROR("expected <", text); - } + // Detect text character (PCDATA) that does not require processing + struct text_pure_no_ws_pred { + static unsigned char test(Ch ch) { + return internal::lookup_tables< + 0>::lookup_text_pure_no_ws[static_cast(ch)]; + } + }; - } + // Detect text character (PCDATA) that does not require processing + struct text_pure_with_ws_pred { + static unsigned char test(Ch ch) { + return internal::lookup_tables< + 0>::lookup_text_pure_with_ws[static_cast(ch)]; + } + }; + + // Detect attribute value character + template struct attribute_value_pred { + static unsigned char test(Ch ch) { + if (Quote == Ch('\'')) + return internal::lookup_tables< + 0>::lookup_attribute_data_1[static_cast(ch)]; + if (Quote == Ch('\"')) + return internal::lookup_tables< + 0>::lookup_attribute_data_2[static_cast(ch)]; + return 0; // Should never be executed, to avoid warnings on Comeau + } + }; + + // Detect attribute value character + template struct attribute_value_pure_pred { + static unsigned char test(Ch ch) { + if (Quote == Ch('\'')) + return internal::lookup_tables< + 0>::lookup_attribute_data_1_pure[static_cast(ch)]; + if (Quote == Ch('\"')) + return internal::lookup_tables< + 0>::lookup_attribute_data_2_pure[static_cast(ch)]; + return 0; // Should never be executed, to avoid warnings on Comeau + } + }; + + // Insert coded character, using UTF8 or 8-bit ASCII + template + static void insert_coded_character(Ch *&text, unsigned long code) { + if (Flags & parse_no_utf8) { + // Insert 8-bit ASCII character + // Todo: possibly verify that code is less than 256 and use replacement + // char otherwise? + text[0] = static_cast(code); + text += 1; + } else { + // Insert UTF8 sequence + if (code < 0x80) // 1 byte sequence + { + text[0] = static_cast(code); + text += 1; + } else if (code < 0x800) // 2 byte sequence + { + text[1] = static_cast((code | 0x80) & 0xBF); + code >>= 6; + text[0] = static_cast(code | 0xC0); + text += 2; + } else if (code < 0x10000) // 3 byte sequence + { + text[2] = static_cast((code | 0x80) & 0xBF); + code >>= 6; + text[1] = static_cast((code | 0x80) & 0xBF); + code >>= 6; + text[0] = static_cast(code | 0xE0); + text += 3; + } else if (code < 0x110000) // 4 byte sequence + { + text[3] = static_cast((code | 0x80) & 0xBF); + code >>= 6; + text[2] = static_cast((code | 0x80) & 0xBF); + code >>= 6; + text[1] = static_cast((code | 0x80) & 0xBF); + code >>= 6; + text[0] = static_cast(code | 0xF0); + text += 4; + } else // Invalid, only codes up to 0x10FFFF are allowed in Unicode + { + RAPIDXML_PARSE_ERROR("invalid numeric character entity", text); + } + } + } + + // Skip characters until predicate evaluates to true + template static void skip(Ch *&text) { + Ch *tmp = text; + while (StopPred::test(*tmp)) + ++tmp; + text = tmp; + } + + // Skip characters until predicate evaluates to true while doing the + // following: + // - replacing XML character entity references with proper characters (' + // & " < > &#...;) + // - condensing whitespace sequences to single space character + template + static Ch *skip_and_expand_character_refs(Ch *&text) { + // If entity translation, whitespace condense and whitespace trimming is + // disabled, use plain skip + if (Flags & parse_no_entity_translation && + !(Flags & parse_normalize_whitespace) && + !(Flags & parse_trim_whitespace)) { + skip(text); + return text; + } - //! Clears the document by deleting all nodes and clearing the memory pool. - //! All nodes owned by document pool are destroyed. - void clear() - { - this->remove_all_nodes(); - this->remove_all_attributes(); - memory_pool::clear(); - } - - private: - - /////////////////////////////////////////////////////////////////////// - // Internal character utility functions - - // Detect whitespace character - struct whitespace_pred - { - static unsigned char test(Ch ch) - { - return internal::lookup_tables<0>::lookup_whitespace[static_cast(ch)]; - } - }; - - // Detect node name character - struct node_name_pred - { - static unsigned char test(Ch ch) - { - return internal::lookup_tables<0>::lookup_node_name[static_cast(ch)]; - } - }; - - // Detect attribute name character - struct attribute_name_pred - { - static unsigned char test(Ch ch) - { - return internal::lookup_tables<0>::lookup_attribute_name[static_cast(ch)]; - } - }; - - // Detect text character (PCDATA) - struct text_pred - { - static unsigned char test(Ch ch) - { - return internal::lookup_tables<0>::lookup_text[static_cast(ch)]; - } - }; - - // Detect text character (PCDATA) that does not require processing - struct text_pure_no_ws_pred - { - static unsigned char test(Ch ch) - { - return internal::lookup_tables<0>::lookup_text_pure_no_ws[static_cast(ch)]; - } - }; - - // Detect text character (PCDATA) that does not require processing - struct text_pure_with_ws_pred - { - static unsigned char test(Ch ch) - { - return internal::lookup_tables<0>::lookup_text_pure_with_ws[static_cast(ch)]; - } - }; - - // Detect attribute value character - template - struct attribute_value_pred - { - static unsigned char test(Ch ch) - { - if (Quote == Ch('\'')) - return internal::lookup_tables<0>::lookup_attribute_data_1[static_cast(ch)]; - if (Quote == Ch('\"')) - return internal::lookup_tables<0>::lookup_attribute_data_2[static_cast(ch)]; - return 0; // Should never be executed, to avoid warnings on Comeau - } - }; - - // Detect attribute value character - template - struct attribute_value_pure_pred - { - static unsigned char test(Ch ch) - { - if (Quote == Ch('\'')) - return internal::lookup_tables<0>::lookup_attribute_data_1_pure[static_cast(ch)]; - if (Quote == Ch('\"')) - return internal::lookup_tables<0>::lookup_attribute_data_2_pure[static_cast(ch)]; - return 0; // Should never be executed, to avoid warnings on Comeau - } - }; - - // Insert coded character, using UTF8 or 8-bit ASCII - template - static void insert_coded_character(Ch *&text, unsigned long code) - { - if (Flags & parse_no_utf8) - { - // Insert 8-bit ASCII character - // Todo: possibly verify that code is less than 256 and use replacement char otherwise? - text[0] = static_cast(code); - text += 1; - } + // Use simple skip until first modification is detected + skip(text); + + // Use translation skip + Ch *src = text; + Ch *dest = src; + while (StopPred::test(*src)) { + // If entity translation is enabled + if (!(Flags & parse_no_entity_translation)) { + // Test if replacement is needed + if (src[0] == Ch('&')) { + switch (src[1]) { + + // & ' + case Ch('a'): + if (src[2] == Ch('m') && src[3] == Ch('p') && src[4] == Ch(';')) { + *dest = Ch('&'); + ++dest; + src += 5; + continue; + } + if (src[2] == Ch('p') && src[3] == Ch('o') && src[4] == Ch('s') && + src[5] == Ch(';')) { + *dest = Ch('\''); + ++dest; + src += 6; + continue; + } + break; + + // " + case Ch('q'): + if (src[2] == Ch('u') && src[3] == Ch('o') && src[4] == Ch('t') && + src[5] == Ch(';')) { + *dest = Ch('"'); + ++dest; + src += 6; + continue; + } + break; + + // > + case Ch('g'): + if (src[2] == Ch('t') && src[3] == Ch(';')) { + *dest = Ch('>'); + ++dest; + src += 4; + continue; + } + break; + + // < + case Ch('l'): + if (src[2] == Ch('t') && src[3] == Ch(';')) { + *dest = Ch('<'); + ++dest; + src += 4; + continue; + } + break; + + // &#...; - assumes ASCII + case Ch('#'): + if (src[2] == Ch('x')) { + unsigned long code = 0; + src += 3; // Skip &#x + while (1) { + unsigned char digit = internal::lookup_tables< + 0>::lookup_digits[static_cast(*src)]; + if (digit == 0xFF) + break; + code = code * 16 + digit; + ++src; + } + insert_coded_character(dest, + code); // Put character in output + } else { + unsigned long code = 0; + src += 2; // Skip &# + while (1) { + unsigned char digit = internal::lookup_tables< + 0>::lookup_digits[static_cast(*src)]; + if (digit == 0xFF) + break; + code = code * 10 + digit; + ++src; + } + insert_coded_character(dest, + code); // Put character in output + } + if (*src == Ch(';')) + ++src; else - { - // Insert UTF8 sequence - if (code < 0x80) // 1 byte sequence - { - text[0] = static_cast(code); - text += 1; - } - else if (code < 0x800) // 2 byte sequence - { - text[1] = static_cast((code | 0x80) & 0xBF); code >>= 6; - text[0] = static_cast(code | 0xC0); - text += 2; - } - else if (code < 0x10000) // 3 byte sequence - { - text[2] = static_cast((code | 0x80) & 0xBF); code >>= 6; - text[1] = static_cast((code | 0x80) & 0xBF); code >>= 6; - text[0] = static_cast(code | 0xE0); - text += 3; - } - else if (code < 0x110000) // 4 byte sequence - { - text[3] = static_cast((code | 0x80) & 0xBF); code >>= 6; - text[2] = static_cast((code | 0x80) & 0xBF); code >>= 6; - text[1] = static_cast((code | 0x80) & 0xBF); code >>= 6; - text[0] = static_cast(code | 0xF0); - text += 4; - } - else // Invalid, only codes up to 0x10FFFF are allowed in Unicode - { - RAPIDXML_PARSE_ERROR("invalid numeric character entity", text); - } - } - } - - // Skip characters until predicate evaluates to true - template - static void skip(Ch *&text) - { - Ch *tmp = text; - while (StopPred::test(*tmp)) - ++tmp; - text = tmp; - } - - // Skip characters until predicate evaluates to true while doing the following: - // - replacing XML character entity references with proper characters (' & " < > &#...;) - // - condensing whitespace sequences to single space character - template - static Ch *skip_and_expand_character_refs(Ch *&text) - { - // If entity translation, whitespace condense and whitespace trimming is disabled, use plain skip - if (Flags & parse_no_entity_translation && - !(Flags & parse_normalize_whitespace) && - !(Flags & parse_trim_whitespace)) - { - skip(text); - return text; - } - - // Use simple skip until first modification is detected - skip(text); - - // Use translation skip - Ch *src = text; - Ch *dest = src; - while (StopPred::test(*src)) - { - // If entity translation is enabled - if (!(Flags & parse_no_entity_translation)) - { - // Test if replacement is needed - if (src[0] == Ch('&')) - { - switch (src[1]) - { - - // & ' - case Ch('a'): - if (src[2] == Ch('m') && src[3] == Ch('p') && src[4] == Ch(';')) - { - *dest = Ch('&'); - ++dest; - src += 5; - continue; - } - if (src[2] == Ch('p') && src[3] == Ch('o') && src[4] == Ch('s') && src[5] == Ch(';')) - { - *dest = Ch('\''); - ++dest; - src += 6; - continue; - } - break; - - // " - case Ch('q'): - if (src[2] == Ch('u') && src[3] == Ch('o') && src[4] == Ch('t') && src[5] == Ch(';')) - { - *dest = Ch('"'); - ++dest; - src += 6; - continue; - } - break; - - // > - case Ch('g'): - if (src[2] == Ch('t') && src[3] == Ch(';')) - { - *dest = Ch('>'); - ++dest; - src += 4; - continue; - } - break; - - // < - case Ch('l'): - if (src[2] == Ch('t') && src[3] == Ch(';')) - { - *dest = Ch('<'); - ++dest; - src += 4; - continue; - } - break; - - // &#...; - assumes ASCII - case Ch('#'): - if (src[2] == Ch('x')) - { - unsigned long code = 0; - src += 3; // Skip &#x - while (1) - { - unsigned char digit = internal::lookup_tables<0>::lookup_digits[static_cast(*src)]; - if (digit == 0xFF) - break; - code = code * 16 + digit; - ++src; - } - insert_coded_character(dest, code); // Put character in output - } - else - { - unsigned long code = 0; - src += 2; // Skip &# - while (1) - { - unsigned char digit = internal::lookup_tables<0>::lookup_digits[static_cast(*src)]; - if (digit == 0xFF) - break; - code = code * 10 + digit; - ++src; - } - insert_coded_character(dest, code); // Put character in output - } - if (*src == Ch(';')) - ++src; - else - RAPIDXML_PARSE_ERROR("expected ;", src); - continue; - - // Something else - default: - // Ignore, just copy '&' verbatim - break; - - } - } - } - - // If whitespace condensing is enabled - if (Flags & parse_normalize_whitespace) - { - // Test if condensing is needed - if (whitespace_pred::test(*src)) - { - *dest = Ch(' '); ++dest; // Put single space in dest - ++src; // Skip first whitespace char - // Skip remaining whitespace chars - while (whitespace_pred::test(*src)) - ++src; - continue; - } - } - - // No replacement, only copy character - *dest++ = *src++; - - } - - // Return new end - text = src; - return dest; + RAPIDXML_PARSE_ERROR("expected ;", src); + continue; + + // Something else + default: + // Ignore, just copy '&' verbatim + break; + } + } + } + + // If whitespace condensing is enabled + if (Flags & parse_normalize_whitespace) { + // Test if condensing is needed + if (whitespace_pred::test(*src)) { + *dest = Ch(' '); + ++dest; // Put single space in dest + ++src; // Skip first whitespace char + // Skip remaining whitespace chars + while (whitespace_pred::test(*src)) + ++src; + continue; + } + } + + // No replacement, only copy character + *dest++ = *src++; + } - } + // Return new end + text = src; + return dest; + } + + /////////////////////////////////////////////////////////////////////// + // Internal parsing functions + + // Parse BOM, if any + template void parse_bom(Ch *&text) { + // UTF-8? + if (static_cast(text[0]) == 0xEF && + static_cast(text[1]) == 0xBB && + static_cast(text[2]) == 0xBF) { + text += 3; // Skup utf-8 bom + } + } + + // Parse XML declaration ( xml_node *parse_xml_declaration(Ch *&text) { + // If parsing of declaration is disabled + if (!(Flags & parse_declaration_node)) { + // Skip until end of declaration + while (text[0] != Ch('?') || text[1] != Ch('>')) { + if (!text[0]) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + text += 2; // Skip '?>' + return 0; + } - /////////////////////////////////////////////////////////////////////// - // Internal parsing functions - - // Parse BOM, if any - template - void parse_bom(Ch *&text) - { - // UTF-8? - if (static_cast(text[0]) == 0xEF && - static_cast(text[1]) == 0xBB && - static_cast(text[2]) == 0xBF) - { - text += 3; // Skup utf-8 bom - } - } + // Create declaration + xml_node *declaration = this->allocate_node(node_declaration); + + // Skip whitespace before attributes or ?> + skip(text); + + // Parse declaration attributes + parse_node_attributes(text, declaration); + + // Skip ?> + if (text[0] != Ch('?') || text[1] != Ch('>')) + RAPIDXML_PARSE_ERROR("expected ?>", text); + text += 2; + + return declaration; + } + + // Parse XML comment (' + return 0; // Do not produce comment node + } - // Parse XML declaration ( - xml_node *parse_xml_declaration(Ch *&text) - { - // If parsing of declaration is disabled - if (!(Flags & parse_declaration_node)) - { - // Skip until end of declaration - while (text[0] != Ch('?') || text[1] != Ch('>')) - { - if (!text[0]) - RAPIDXML_PARSE_ERROR("unexpected end of data", text); - ++text; - } - text += 2; // Skip '?>' - return 0; - } + // Remember value start + Ch *value = text; - // Create declaration - xml_node *declaration = this->allocate_node(node_declaration); - - // Skip whitespace before attributes or ?> - skip(text); - - // Parse declaration attributes - parse_node_attributes(text, declaration); - - // Skip ?> - if (text[0] != Ch('?') || text[1] != Ch('>')) - RAPIDXML_PARSE_ERROR("expected ?>", text); - text += 2; - - return declaration; - } + // Skip until end of comment + while (text[0] != Ch('-') || text[1] != Ch('-') || text[2] != Ch('>')) { + if (!text[0]) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } - // Parse XML comment (' - return 0; // Do not produce comment node - } + // Create comment node + xml_node *comment = this->allocate_node(node_comment); + comment->value(value, text - value); + + // Place zero terminator after comment value + if (!(Flags & parse_no_string_terminators)) + *text = Ch('\0'); + + text += 3; // Skip '-->' + return comment; + } + + // Parse DOCTYPE + template xml_node *parse_doctype(Ch *&text) { + // Remember value start + Ch *value = text; + + // Skip to > + while (*text != Ch('>')) { + // Determine character type + switch (*text) { + + // If '[' encountered, scan for matching ending ']' using naive algorithm + // with depth This works for all W3C test files except for 2 most wicked + case Ch('['): { + ++text; // Skip '[' + int depth = 1; + while (depth > 0) { + switch (*text) { + case Ch('['): + ++depth; + break; + case Ch(']'): + --depth; + break; + case 0: + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + } + ++text; + } + break; + } + + // Error on end of text + case Ch('\0'): + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + + // Other character, skip it + default: + ++text; + } + } - // Remember value start - Ch *value = text; + // If DOCTYPE nodes enabled + if (Flags & parse_doctype_node) { + // Create a new doctype node + xml_node *doctype = this->allocate_node(node_doctype); + doctype->value(value, text - value); + + // Place zero terminator after value + if (!(Flags & parse_no_string_terminators)) + *text = Ch('\0'); + + text += 1; // skip '>' + return doctype; + } else { + text += 1; // skip '>' + return 0; + } + } + + // Parse PI + template xml_node *parse_pi(Ch *&text) { + // If creation of PI nodes is enabled + if (Flags & parse_pi_nodes) { + // Create pi node + xml_node *pi = this->allocate_node(node_pi); + + // Extract PI target name + Ch *name = text; + skip(text); + if (text == name) + RAPIDXML_PARSE_ERROR("expected PI target", text); + pi->name(name, text - name); + + // Skip whitespace between pi target and pi + skip(text); + + // Remember start of pi + Ch *value = text; + + // Skip to '?>' + while (text[0] != Ch('?') || text[1] != Ch('>')) { + if (*text == Ch('\0')) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + + // Set pi value (verbatim, no entity expansion or whitespace + // normalization) + pi->value(value, text - value); + + // Place zero terminator after name and value + if (!(Flags & parse_no_string_terminators)) { + pi->name()[pi->name_size()] = Ch('\0'); + pi->value()[pi->value_size()] = Ch('\0'); + } + + text += 2; // Skip '?>' + return pi; + } else { + // Skip to '?>' + while (text[0] != Ch('?') || text[1] != Ch('>')) { + if (*text == Ch('\0')) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + text += 2; // Skip '?>' + return 0; + } + } + + // Parse and append data + // Return character that ends data. + // This is necessary because this character might have been overwritten by a + // terminating 0 + template + Ch parse_and_append_data(xml_node *node, Ch *&text, Ch *contents_start) { + // Backup to contents start if whitespace trimming is disabled + if (!(Flags & parse_trim_whitespace)) + text = contents_start; + + // Skip until end of data + Ch *value = text, *end; + if (Flags & parse_normalize_whitespace) + end = skip_and_expand_character_refs(text); + else + end = skip_and_expand_character_refs(text); + + // Trim trailing whitespace if flag is set; leading was already trimmed by + // whitespace skip after > + if (Flags & parse_trim_whitespace) { + if (Flags & parse_normalize_whitespace) { + // Whitespace is already condensed to single space characters by + // skipping function, so just trim 1 char off the end + if (*(end - 1) == Ch(' ')) + --end; + } else { + // Backup until non-whitespace character is found + while (whitespace_pred::test(*(end - 1))) + --end; + } + } - // Skip until end of comment - while (text[0] != Ch('-') || text[1] != Ch('-') || text[2] != Ch('>')) - { - if (!text[0]) - RAPIDXML_PARSE_ERROR("unexpected end of data", text); - ++text; - } + // If characters are still left between end and value (this test is only + // necessary if normalization is enabled) Create new data node + if (!(Flags & parse_no_data_nodes)) { + xml_node *data = this->allocate_node(node_data); + data->value(value, end - value); + node->append_node(data); + } - // Create comment node - xml_node *comment = this->allocate_node(node_comment); - comment->value(value, text - value); - - // Place zero terminator after comment value - if (!(Flags & parse_no_string_terminators)) - *text = Ch('\0'); - - text += 3; // Skip '-->' - return comment; - } + // Add data to parent node if no data exists yet + if (!(Flags & parse_no_element_values)) + if (*node->value() == Ch('\0')) + node->value(value, end - value); + + // Place zero terminator after value + if (!(Flags & parse_no_string_terminators)) { + Ch ch = *text; + *end = Ch('\0'); + return ch; // Return character that ends data; this is required because + // zero terminator overwritten it + } - // Parse DOCTYPE - template - xml_node *parse_doctype(Ch *&text) - { - // Remember value start - Ch *value = text; - - // Skip to > - while (*text != Ch('>')) - { - // Determine character type - switch (*text) - { - - // If '[' encountered, scan for matching ending ']' using naive algorithm with depth - // This works for all W3C test files except for 2 most wicked - case Ch('['): - { - ++text; // Skip '[' - int depth = 1; - while (depth > 0) - { - switch (*text) - { - case Ch('['): ++depth; break; - case Ch(']'): --depth; break; - case 0: RAPIDXML_PARSE_ERROR("unexpected end of data", text); - } - ++text; - } - break; - } - - // Error on end of text - case Ch('\0'): - RAPIDXML_PARSE_ERROR("unexpected end of data", text); - - // Other character, skip it - default: - ++text; - - } - } - - // If DOCTYPE nodes enabled - if (Flags & parse_doctype_node) - { - // Create a new doctype node - xml_node *doctype = this->allocate_node(node_doctype); - doctype->value(value, text - value); - - // Place zero terminator after value - if (!(Flags & parse_no_string_terminators)) - *text = Ch('\0'); - - text += 1; // skip '>' - return doctype; - } - else - { - text += 1; // skip '>' - return 0; - } + // Return character that ends data + return *text; + } + + // Parse CDATA + template xml_node *parse_cdata(Ch *&text) { + // If CDATA is disabled + if (Flags & parse_no_data_nodes) { + // Skip until end of cdata + while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) { + if (!text[0]) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + text += 3; // Skip ]]> + return 0; // Do not produce CDATA node + } - } + // Skip until end of cdata + Ch *value = text; + while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) { + if (!text[0]) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + // Create new cdata node + xml_node *cdata = this->allocate_node(node_cdata); + cdata->value(value, text - value); + + // Place zero terminator after value + if (!(Flags & parse_no_string_terminators)) + *text = Ch('\0'); + + text += 3; // Skip ]]> + return cdata; + } + + // Parse element node + template xml_node *parse_element(Ch *&text) { + // Create element node + xml_node *element = this->allocate_node(node_element); + + // Extract element name + Ch *name = text; + skip(text); + if (text == name) + RAPIDXML_PARSE_ERROR("expected element name", text); + element->name(name, text - name); + + // Skip whitespace between element name and attributes or > + skip(text); + + // Parse attributes, if any + parse_node_attributes(text, element); + + // Determine ending type + if (*text == Ch('>')) { + ++text; + parse_node_contents(text, element); + } else if (*text == Ch('/')) { + ++text; + if (*text != Ch('>')) + RAPIDXML_PARSE_ERROR("expected >", text); + ++text; + } else + RAPIDXML_PARSE_ERROR("expected >", text); + + // Place zero terminator after name + if (!(Flags & parse_no_string_terminators)) + element->name()[element->name_size()] = Ch('\0'); + + // Return parsed element + return element; + } + + // Determine node type, and parse it + template xml_node *parse_node(Ch *&text) { + // Parse proper node type + switch (text[0]) { + + // <... + default: + // Parse and append element node + return parse_element(text); + + // (text); + } else { // Parse PI - template - xml_node *parse_pi(Ch *&text) - { - // If creation of PI nodes is enabled - if (Flags & parse_pi_nodes) - { - // Create pi node - xml_node *pi = this->allocate_node(node_pi); - - // Extract PI target name - Ch *name = text; - skip(text); - if (text == name) - RAPIDXML_PARSE_ERROR("expected PI target", text); - pi->name(name, text - name); - - // Skip whitespace between pi target and pi - skip(text); - - // Remember start of pi - Ch *value = text; - - // Skip to '?>' - while (text[0] != Ch('?') || text[1] != Ch('>')) - { - if (*text == Ch('\0')) - RAPIDXML_PARSE_ERROR("unexpected end of data", text); - ++text; - } - - // Set pi value (verbatim, no entity expansion or whitespace normalization) - pi->value(value, text - value); - - // Place zero terminator after name and value - if (!(Flags & parse_no_string_terminators)) - { - pi->name()[pi->name_size()] = Ch('\0'); - pi->value()[pi->value_size()] = Ch('\0'); - } - - text += 2; // Skip '?>' - return pi; - } - else - { - // Skip to '?>' - while (text[0] != Ch('?') || text[1] != Ch('>')) - { - if (*text == Ch('\0')) - RAPIDXML_PARSE_ERROR("unexpected end of data", text); - ++text; - } - text += 2; // Skip '?>' - return 0; - } - } - - // Parse and append data - // Return character that ends data. - // This is necessary because this character might have been overwritten by a terminating 0 - template - Ch parse_and_append_data(xml_node *node, Ch *&text, Ch *contents_start) - { - // Backup to contents start if whitespace trimming is disabled - if (!(Flags & parse_trim_whitespace)) - text = contents_start; - - // Skip until end of data - Ch *value = text, *end; - if (Flags & parse_normalize_whitespace) - end = skip_and_expand_character_refs(text); - else - end = skip_and_expand_character_refs(text); - - // Trim trailing whitespace if flag is set; leading was already trimmed by whitespace skip after > - if (Flags & parse_trim_whitespace) - { - if (Flags & parse_normalize_whitespace) - { - // Whitespace is already condensed to single space characters by skipping function, so just trim 1 char off the end - if (*(end - 1) == Ch(' ')) - --end; - } - else - { - // Backup until non-whitespace character is found - while (whitespace_pred::test(*(end - 1))) - --end; - } - } - - // If characters are still left between end and value (this test is only necessary if normalization is enabled) - // Create new data node - if (!(Flags & parse_no_data_nodes)) - { - xml_node *data = this->allocate_node(node_data); - data->value(value, end - value); - node->append_node(data); - } - - // Add data to parent node if no data exists yet - if (!(Flags & parse_no_element_values)) - if (*node->value() == Ch('\0')) - node->value(value, end - value); - - // Place zero terminator after value - if (!(Flags & parse_no_string_terminators)) - { - Ch ch = *text; - *end = Ch('\0'); - return ch; // Return character that ends data; this is required because zero terminator overwritten it - } - - // Return character that ends data - return *text; - } - - // Parse CDATA - template - xml_node *parse_cdata(Ch *&text) - { - // If CDATA is disabled - if (Flags & parse_no_data_nodes) - { - // Skip until end of cdata - while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) - { - if (!text[0]) - RAPIDXML_PARSE_ERROR("unexpected end of data", text); - ++text; - } - text += 3; // Skip ]]> - return 0; // Do not produce CDATA node - } - - // Skip until end of cdata - Ch *value = text; - while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) - { - if (!text[0]) - RAPIDXML_PARSE_ERROR("unexpected end of data", text); - ++text; - } - - // Create new cdata node - xml_node *cdata = this->allocate_node(node_cdata); - cdata->value(value, text - value); - - // Place zero terminator after value - if (!(Flags & parse_no_string_terminators)) - *text = Ch('\0'); - - text += 3; // Skip ]]> - return cdata; - } - - // Parse element node - template - xml_node *parse_element(Ch *&text) - { - // Create element node - xml_node *element = this->allocate_node(node_element); - - // Extract element name - Ch *name = text; + return parse_pi(text); + } + + // (text); + } + break; + + // (text); + } + break; + + // (text); + } + + } // switch + + // Attempt to skip other, unrecognized node types starting with ')) { + if (*text == 0) + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + ++text; + } + ++text; // Skip '>' + return 0; // No node recognized + } + } + + // Parse contents of the node - children, data etc. + template void parse_node_contents(Ch *&text, xml_node *node) { + // For all children and text + while (1) { + // Skip whitespace between > and node contents + Ch *contents_start = + text; // Store start of node contents before whitespace is skipped + skip(text); + Ch next_char = *text; + + // After data nodes, instead of continuing the loop, control jumps here. + // This is because zero termination inside parse_and_append_data() function + // would wreak havoc with the above code. + // Also, skipping whitespace after data nodes is unnecessary. + after_data_node: + + // Determine what comes next: node closing, child node, data node, or 0? + switch (next_char) { + + // Node closing or child node + case Ch('<'): + if (text[1] == Ch('/')) { + // Node closing + text += 2; // Skip '(text); - if (text == name) - RAPIDXML_PARSE_ERROR("expected element name", text); - element->name(name, text - name); - - // Skip whitespace between element name and attributes or > - skip(text); - - // Parse attributes, if any - parse_node_attributes(text, element); - - // Determine ending type - if (*text == Ch('>')) - { - ++text; - parse_node_contents(text, element); - } - else if (*text == Ch('/')) - { - ++text; - if (*text != Ch('>')) - RAPIDXML_PARSE_ERROR("expected >", text); - ++text; - } - else - RAPIDXML_PARSE_ERROR("expected >", text); - - // Place zero terminator after name - if (!(Flags & parse_no_string_terminators)) - element->name()[element->name_size()] = Ch('\0'); - - // Return parsed element - return element; - } - - // Determine node type, and parse it - template - xml_node *parse_node(Ch *&text) - { - // Parse proper node type - switch (text[0]) - { - - // <... - default: - // Parse and append element node - return parse_element(text); - - // (text); - } - else - { - // Parse PI - return parse_pi(text); - } - - // (text); - } - break; - - // (text); - } - break; - - // (text); - } - - } // switch - - // Attempt to skip other, unrecognized node types starting with ')) - { - if (*text == 0) - RAPIDXML_PARSE_ERROR("unexpected end of data", text); - ++text; - } - ++text; // Skip '>' - return 0; // No node recognized - - } - } - - // Parse contents of the node - children, data etc. - template - void parse_node_contents(Ch *&text, xml_node *node) - { - // For all children and text - while (1) - { - // Skip whitespace between > and node contents - Ch *contents_start = text; // Store start of node contents before whitespace is skipped - skip(text); - Ch next_char = *text; - - // After data nodes, instead of continuing the loop, control jumps here. - // This is because zero termination inside parse_and_append_data() function - // would wreak havoc with the above code. - // Also, skipping whitespace after data nodes is unnecessary. - after_data_node: - - // Determine what comes next: node closing, child node, data node, or 0? - switch (next_char) - { - - // Node closing or child node - case Ch('<'): - if (text[1] == Ch('/')) - { - // Node closing - text += 2; // Skip '(text); - if (!internal::compare(node->name(), node->name_size(), closing_name, text - closing_name, true)) - RAPIDXML_PARSE_ERROR("invalid closing tag name", text); - } - else - { - // No validation, just skip name - skip(text); - } - // Skip remaining whitespace after node name - skip(text); - if (*text != Ch('>')) - RAPIDXML_PARSE_ERROR("expected >", text); - ++text; // Skip '>' - return; // Node closed, finished parsing contents - } - else - { - // Child node - ++text; // Skip '<' - if (xml_node *child = parse_node(text)) - node->append_node(child); - } - break; - - // End of data - error - case Ch('\0'): - RAPIDXML_PARSE_ERROR("unexpected end of data", text); - - // Data node - default: - next_char = parse_and_append_data(node, text, contents_start); - goto after_data_node; // Bypass regular processing after data nodes - - } - } - } - - // Parse XML attributes of the node - template - void parse_node_attributes(Ch *&text, xml_node *node) - { - // For all attributes - while (attribute_name_pred::test(*text)) - { - // Extract attribute name - Ch *name = text; - ++text; // Skip first character of attribute name - skip(text); - if (text == name) - RAPIDXML_PARSE_ERROR("expected attribute name", name); - - // Create new attribute - xml_attribute *attribute = this->allocate_attribute(); - attribute->name(name, text - name); - node->append_attribute(attribute); - - // Skip whitespace after attribute name - skip(text); - - // Skip = - if (*text != Ch('=')) - RAPIDXML_PARSE_ERROR("expected =", text); - ++text; - - // Add terminating zero after name - if (!(Flags & parse_no_string_terminators)) - attribute->name()[attribute->name_size()] = 0; - - // Skip whitespace after = - skip(text); - - // Skip quote and remember if it was ' or " - Ch quote = *text; - if (quote != Ch('\'') && quote != Ch('"')) - RAPIDXML_PARSE_ERROR("expected ' or \"", text); - ++text; - - // Extract attribute value and expand char refs in it - Ch *value = text, *end; - const int AttFlags = Flags & ~parse_normalize_whitespace; // No whitespace normalization in attributes - if (quote == Ch('\'')) - end = skip_and_expand_character_refs, attribute_value_pure_pred, AttFlags>(text); - else - end = skip_and_expand_character_refs, attribute_value_pure_pred, AttFlags>(text); - - // Set attribute value - attribute->value(value, end - value); - - // Make sure that end quote is present - if (*text != quote) - RAPIDXML_PARSE_ERROR("expected ' or \"", text); - ++text; // Skip quote - - // Add terminating zero after value - if (!(Flags & parse_no_string_terminators)) - attribute->value()[attribute->value_size()] = 0; - - // Skip whitespace after attribute value - skip(text); - } - } - - }; - - //! \cond internal - namespace internal - { - - // Whitespace (space \n \r \t) - template - const unsigned char lookup_tables::lookup_whitespace[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // C - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // D - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // E - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F - }; - - // Node name (anything but space \n \r \t / > ? \0) - template - const unsigned char lookup_tables::lookup_node_name[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, // 3 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F - }; - - // Text (i.e. PCDATA) (anything but < \0) - template - const unsigned char lookup_tables::lookup_text[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F - }; - - // Text (i.e. PCDATA) that does not require processing when ws normalization is disabled - // (anything but < \0 &) - template - const unsigned char lookup_tables::lookup_text_pure_no_ws[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 - 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F - }; - - // Text (i.e. PCDATA) that does not require processing when ws normalizationis is enabled - // (anything but < \0 & space \n \r \t) - template - const unsigned char lookup_tables::lookup_text_pure_with_ws[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 - 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F - }; - - // Attribute name (anything but space \n \r \t / < > = ? ! \0) - template - const unsigned char lookup_tables::lookup_attribute_name[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, // 3 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F - }; - - // Attribute data with single quote (anything but ' \0) - template - const unsigned char lookup_tables::lookup_attribute_data_1[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 - 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F - }; - - // Attribute data with single quote that does not require processing (anything but ' \0 &) - template - const unsigned char lookup_tables::lookup_attribute_data_1_pure[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 - 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F - }; - - // Attribute data with double quote (anything but " \0) - template - const unsigned char lookup_tables::lookup_attribute_data_2[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 - 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F - }; - - // Attribute data with double quote that does not require processing (anything but " \0 &) - template - const unsigned char lookup_tables::lookup_attribute_data_2_pure[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 - 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F - }; - - // Digits (dec and hex, 255 denotes end of numeric character reference) - template - const unsigned char lookup_tables::lookup_digits[256] = - { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 0 - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 1 - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 2 - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,255,255,255,255,255,255, // 3 - 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 4 - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 5 - 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 6 - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 7 - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 8 - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 9 - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // A - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // B - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // C - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // D - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // E - 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // F - }; - - // Upper case conversion - template - const unsigned char lookup_tables::lookup_upcase[256] = - { - // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A B C D E F - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0 - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, // 1 - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, // 2 - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, // 3 - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // 4 - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, // 5 - 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // 6 - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123,124,125,126,127, // 7 - 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, // 8 - 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, // 9 - 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, // A - 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, // B - 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, // C - 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, // D - 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, // E - 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 // F - }; + if (!internal::compare(node->name(), node->name_size(), + closing_name, text - closing_name, true)) + RAPIDXML_PARSE_ERROR("invalid closing tag name", text); + } else { + // No validation, just skip name + skip(text); + } + // Skip remaining whitespace after node name + skip(text); + if (*text != Ch('>')) + RAPIDXML_PARSE_ERROR("expected >", text); + ++text; // Skip '>' + return; // Node closed, finished parsing contents + } else { + // Child node + ++text; // Skip '<' + if (xml_node *child = parse_node(text)) + node->append_node(child); + } + break; + + // End of data - error + case Ch('\0'): + RAPIDXML_PARSE_ERROR("unexpected end of data", text); + + // Data node + default: + next_char = parse_and_append_data(node, text, contents_start); + goto after_data_node; // Bypass regular processing after data nodes + } } - //! \endcond - -} + } + + // Parse XML attributes of the node + template + void parse_node_attributes(Ch *&text, xml_node *node) { + // For all attributes + while (attribute_name_pred::test(*text)) { + // Extract attribute name + Ch *name = text; + ++text; // Skip first character of attribute name + skip(text); + if (text == name) + RAPIDXML_PARSE_ERROR("expected attribute name", name); + + // Create new attribute + xml_attribute *attribute = this->allocate_attribute(); + attribute->name(name, text - name); + node->append_attribute(attribute); + + // Skip whitespace after attribute name + skip(text); + + // Skip = + if (*text != Ch('=')) + RAPIDXML_PARSE_ERROR("expected =", text); + ++text; + + // Add terminating zero after name + if (!(Flags & parse_no_string_terminators)) + attribute->name()[attribute->name_size()] = 0; + + // Skip whitespace after = + skip(text); + + // Skip quote and remember if it was ' or " + Ch quote = *text; + if (quote != Ch('\'') && quote != Ch('"')) + RAPIDXML_PARSE_ERROR("expected ' or \"", text); + ++text; + + // Extract attribute value and expand char refs in it + Ch *value = text, *end; + const int AttFlags = + Flags & ~parse_normalize_whitespace; // No whitespace normalization in + // attributes + if (quote == Ch('\'')) + end = + skip_and_expand_character_refs, + attribute_value_pure_pred, + AttFlags>(text); + else + end = skip_and_expand_character_refs, + attribute_value_pure_pred, + AttFlags>(text); + + // Set attribute value + attribute->value(value, end - value); + + // Make sure that end quote is present + if (*text != quote) + RAPIDXML_PARSE_ERROR("expected ' or \"", text); + ++text; // Skip quote + + // Add terminating zero after value + if (!(Flags & parse_no_string_terminators)) + attribute->value()[attribute->value_size()] = 0; + + // Skip whitespace after attribute value + skip(text); + } + } +}; + +//! \cond internal +namespace internal { + +// Whitespace (space \n \r \t) +template +const unsigned char lookup_tables::lookup_whitespace[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // C + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // D + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // E + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F +}; + +// Node name (anything but space \n \r \t / > ? \0) +template +const unsigned char lookup_tables::lookup_node_name[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F +}; + +// Text (i.e. PCDATA) (anything but < \0) +template +const unsigned char lookup_tables::lookup_text[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F +}; + +// Text (i.e. PCDATA) that does not require processing when ws normalization is +// disabled (anything but < \0 &) +template +const unsigned char lookup_tables::lookup_text_pure_no_ws[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F +}; + +// Text (i.e. PCDATA) that does not require processing when ws normalizationis +// is enabled (anything but < \0 & space \n \r \t) +template +const unsigned char lookup_tables::lookup_text_pure_with_ws[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F +}; + +// Attribute name (anything but space \n \r \t / < > = ? ! \0) +template +const unsigned char lookup_tables::lookup_attribute_name[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F +}; + +// Attribute data with single quote (anything but ' \0) +template +const unsigned char lookup_tables::lookup_attribute_data_1[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F +}; + +// Attribute data with single quote that does not require processing (anything +// but ' \0 &) +template +const unsigned char lookup_tables::lookup_attribute_data_1_pure[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F +}; + +// Attribute data with double quote (anything but " \0) +template +const unsigned char lookup_tables::lookup_attribute_data_2[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F +}; + +// Attribute data with double quote that does not require processing (anything +// but " \0 &) +template +const unsigned char lookup_tables::lookup_attribute_data_2_pure[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 + 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F +}; + +// Digits (dec and hex, 255 denotes end of numeric character reference) +template +const unsigned char lookup_tables::lookup_digits[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // 0 + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // 1 + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // 2 + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 255, 255, 255, 255, 255, 255, // 3 + 255, 10, 11, 12, 13, 14, 15, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // 4 + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // 5 + 255, 10, 11, 12, 13, 14, 15, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // 6 + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // 7 + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // 8 + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // 9 + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // A + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // B + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // C + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // D + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, // E + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255 // F +}; + +// Upper case conversion +template +const unsigned char lookup_tables::lookup_upcase[256] = { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A B C D E F + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, // 0 + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, // 1 + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, // 2 + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, // 3 + 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, // 4 + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, // 5 + 96, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, // 6 + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 123, 124, 125, 126, 127, // 7 + 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, // 8 + 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, // 9 + 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 170, 171, 172, 173, 174, 175, // A + 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, // B + 192, 193, 194, 195, 196, 197, 198, 199, + 200, 201, 202, 203, 204, 205, 206, 207, // C + 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, // D + 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, // E + 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255 // F +}; +} // namespace internal +//! \endcond + +} // namespace rapidxml // Undefine internal macros #undef RAPIDXML_PARSE_ERROR // On MSVC, restore warnings state #ifdef _MSC_VER - #pragma warning(pop) +#pragma warning(pop) #endif #endif diff --git a/src/rapidxml/rapidxml_ext.hpp b/src/rapidxml/rapidxml_ext.hpp index 703e760..5adde68 100644 --- a/src/rapidxml/rapidxml_ext.hpp +++ b/src/rapidxml/rapidxml_ext.hpp @@ -4,34 +4,42 @@ /* Adding declarations to make it compatible with gcc 4.7 and greater */ namespace rapidxml { namespace internal { - template - inline OutIt print_children(OutIt out, const xml_node* node, int flags, int indent); +template +inline OutIt print_children(OutIt out, const xml_node *node, int flags, + int indent); template -inline OutIt print_attributes(OutIt out, const xml_node* node, int flags); +inline OutIt print_attributes(OutIt out, const xml_node *node, int flags); template -inline OutIt print_data_node(OutIt out, const xml_node* node, int flags, int indent); +inline OutIt print_data_node(OutIt out, const xml_node *node, int flags, + int indent); template -inline OutIt print_cdata_node(OutIt out, const xml_node* node, int flags, int indent); +inline OutIt print_cdata_node(OutIt out, const xml_node *node, int flags, + int indent); template -inline OutIt print_element_node(OutIt out, const xml_node* node, int flags, int indent); +inline OutIt print_element_node(OutIt out, const xml_node *node, int flags, + int indent); template -inline OutIt print_declaration_node(OutIt out, const xml_node* node, int flags, int indent); +inline OutIt print_declaration_node(OutIt out, const xml_node *node, + int flags, int indent); template -inline OutIt print_comment_node(OutIt out, const xml_node* node, int flags, int indent); +inline OutIt print_comment_node(OutIt out, const xml_node *node, int flags, + int indent); template -inline OutIt print_doctype_node(OutIt out, const xml_node* node, int flags, int indent); +inline OutIt print_doctype_node(OutIt out, const xml_node *node, int flags, + int indent); template -inline OutIt print_pi_node(OutIt out, const xml_node* node, int flags, int indent); -} -} +inline OutIt print_pi_node(OutIt out, const xml_node *node, int flags, + int indent); +} // namespace internal +} // namespace rapidxml #include "rapidxml_print.hpp" #endif /* RAPIDXML_EXT_H_ */ diff --git a/src/rapidxml/rapidxml_iterators.hpp b/src/rapidxml/rapidxml_iterators.hpp index 85c5894..c1d7efb 100644 --- a/src/rapidxml/rapidxml_iterators.hpp +++ b/src/rapidxml/rapidxml_iterators.hpp @@ -8,167 +8,125 @@ #include "rapidxml.hpp" -namespace rapidxml -{ - - //! Iterator of child nodes of xml_node - template - class node_iterator - { - - public: - - typedef typename xml_node value_type; - typedef typename xml_node &reference; - typedef typename xml_node *pointer; - typedef std::ptrdiff_t difference_type; - typedef std::bidirectional_iterator_tag iterator_category; - - node_iterator() - : m_node(0) - { - } - - node_iterator(xml_node *node) - : m_node(node->first_node()) - { - } - - reference operator *() const - { - assert(m_node); - return *m_node; - } - - pointer operator->() const - { - assert(m_node); - return m_node; - } - - node_iterator& operator++() - { - assert(m_node); - m_node = m_node->next_sibling(); - return *this; - } - - node_iterator operator++(int) - { - node_iterator tmp = *this; - ++this; - return tmp; - } - - node_iterator& operator--() - { - assert(m_node && m_node->previous_sibling()); - m_node = m_node->previous_sibling(); - return *this; - } - - node_iterator operator--(int) - { - node_iterator tmp = *this; - ++this; - return tmp; - } - - bool operator ==(const node_iterator &rhs) - { - return m_node == rhs.m_node; - } - - bool operator !=(const node_iterator &rhs) - { - return m_node != rhs.m_node; - } - - private: - - xml_node *m_node; - - }; - - //! Iterator of child attributes of xml_node - template - class attribute_iterator - { - - public: - - typedef typename xml_attribute value_type; - typedef typename xml_attribute &reference; - typedef typename xml_attribute *pointer; - typedef std::ptrdiff_t difference_type; - typedef std::bidirectional_iterator_tag iterator_category; - - attribute_iterator() - : m_attribute(0) - { - } - - attribute_iterator(xml_node *node) - : m_attribute(node->first_attribute()) - { - } - - reference operator *() const - { - assert(m_attribute); - return *m_attribute; - } - - pointer operator->() const - { - assert(m_attribute); - return m_attribute; - } - - attribute_iterator& operator++() - { - assert(m_attribute); - m_attribute = m_attribute->next_attribute(); - return *this; - } - - attribute_iterator operator++(int) - { - attribute_iterator tmp = *this; - ++this; - return tmp; - } - - attribute_iterator& operator--() - { - assert(m_attribute && m_attribute->previous_attribute()); - m_attribute = m_attribute->previous_attribute(); - return *this; - } - - attribute_iterator operator--(int) - { - attribute_iterator tmp = *this; - ++this; - return tmp; - } - - bool operator ==(const attribute_iterator &rhs) - { - return m_attribute == rhs.m_attribute; - } - - bool operator !=(const attribute_iterator &rhs) - { - return m_attribute != rhs.m_attribute; - } - - private: - - xml_attribute *m_attribute; - - }; - -} +namespace rapidxml { + +//! Iterator of child nodes of xml_node +template class node_iterator { + +public: + typedef typename xml_node value_type; + typedef typename xml_node &reference; + typedef typename xml_node *pointer; + typedef std::ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + + node_iterator() : m_node(0) {} + + node_iterator(xml_node *node) : m_node(node->first_node()) {} + + reference operator*() const { + assert(m_node); + return *m_node; + } + + pointer operator->() const { + assert(m_node); + return m_node; + } + + node_iterator &operator++() { + assert(m_node); + m_node = m_node->next_sibling(); + return *this; + } + + node_iterator operator++(int) { + node_iterator tmp = *this; + ++this; + return tmp; + } + + node_iterator &operator--() { + assert(m_node && m_node->previous_sibling()); + m_node = m_node->previous_sibling(); + return *this; + } + + node_iterator operator--(int) { + node_iterator tmp = *this; + ++this; + return tmp; + } + + bool operator==(const node_iterator &rhs) { return m_node == rhs.m_node; } + + bool operator!=(const node_iterator &rhs) { return m_node != rhs.m_node; } + +private: + xml_node *m_node; +}; + +//! Iterator of child attributes of xml_node +template class attribute_iterator { + +public: + typedef typename xml_attribute value_type; + typedef typename xml_attribute &reference; + typedef typename xml_attribute *pointer; + typedef std::ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + + attribute_iterator() : m_attribute(0) {} + + attribute_iterator(xml_node *node) + : m_attribute(node->first_attribute()) {} + + reference operator*() const { + assert(m_attribute); + return *m_attribute; + } + + pointer operator->() const { + assert(m_attribute); + return m_attribute; + } + + attribute_iterator &operator++() { + assert(m_attribute); + m_attribute = m_attribute->next_attribute(); + return *this; + } + + attribute_iterator operator++(int) { + attribute_iterator tmp = *this; + ++this; + return tmp; + } + + attribute_iterator &operator--() { + assert(m_attribute && m_attribute->previous_attribute()); + m_attribute = m_attribute->previous_attribute(); + return *this; + } + + attribute_iterator operator--(int) { + attribute_iterator tmp = *this; + ++this; + return tmp; + } + + bool operator==(const attribute_iterator &rhs) { + return m_attribute == rhs.m_attribute; + } + + bool operator!=(const attribute_iterator &rhs) { + return m_attribute != rhs.m_attribute; + } + +private: + xml_attribute *m_attribute; +}; + +} // namespace rapidxml #endif diff --git a/src/rapidxml/rapidxml_print.hpp b/src/rapidxml/rapidxml_print.hpp index 0ae2b14..96aca0f 100644 --- a/src/rapidxml/rapidxml_print.hpp +++ b/src/rapidxml/rapidxml_print.hpp @@ -10,412 +10,434 @@ // Only include streams if not disabled #ifndef RAPIDXML_NO_STREAMS - #include - #include +#include +#include #endif -namespace rapidxml -{ - - /////////////////////////////////////////////////////////////////////// - // Printing flags - - const int print_no_indenting = 0x1; //!< Printer flag instructing the printer to suppress indenting of XML. See print() function. - - /////////////////////////////////////////////////////////////////////// - // Internal - - //! \cond internal - namespace internal - { - - /////////////////////////////////////////////////////////////////////////// - // Internal character operations - - // Copy characters from given range to given output iterator - template - inline OutIt copy_chars(const Ch *begin, const Ch *end, OutIt out) - { - while (begin != end) - *out++ = *begin++; - return out; - } - - // Copy characters from given range to given output iterator and expand - // characters into references (< > ' " &) - template - inline OutIt copy_and_expand_chars(const Ch *begin, const Ch *end, Ch noexpand, OutIt out) - { - while (begin != end) - { - if (*begin == noexpand) - { - *out++ = *begin; // No expansion, copy character - } - else - { - switch (*begin) - { - case Ch('<'): - *out++ = Ch('&'); *out++ = Ch('l'); *out++ = Ch('t'); *out++ = Ch(';'); - break; - case Ch('>'): - *out++ = Ch('&'); *out++ = Ch('g'); *out++ = Ch('t'); *out++ = Ch(';'); - break; - case Ch('\''): - *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('p'); *out++ = Ch('o'); *out++ = Ch('s'); *out++ = Ch(';'); - break; - case Ch('"'): - *out++ = Ch('&'); *out++ = Ch('q'); *out++ = Ch('u'); *out++ = Ch('o'); *out++ = Ch('t'); *out++ = Ch(';'); - break; - case Ch('&'): - *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('m'); *out++ = Ch('p'); *out++ = Ch(';'); - break; - default: - *out++ = *begin; // No expansion, copy character - } - } - ++begin; // Step to next character - } - return out; - } - - // Fill given output iterator with repetitions of the same character - template - inline OutIt fill_chars(OutIt out, int n, Ch ch) - { - for (int i = 0; i < n; ++i) - *out++ = ch; - return out; - } - - // Find character - template - inline bool find_char(const Ch *begin, const Ch *end) - { - while (begin != end) - if (*begin++ == ch) - return true; - return false; - } - - /////////////////////////////////////////////////////////////////////////// - // Internal printing operations - - // Print node - template - inline OutIt print_node(OutIt out, const xml_node *node, int flags, int indent) - { - // Print proper node type - switch (node->type()) - { - - // Document - case node_document: - out = print_children(out, node, flags, indent); - break; - - // Element - case node_element: - out = print_element_node(out, node, flags, indent); - break; - - // Data - case node_data: - out = print_data_node(out, node, flags, indent); - break; - - // CDATA - case node_cdata: - out = print_cdata_node(out, node, flags, indent); - break; - - // Declaration - case node_declaration: - out = print_declaration_node(out, node, flags, indent); - break; - - // Comment - case node_comment: - out = print_comment_node(out, node, flags, indent); - break; - - // Doctype - case node_doctype: - out = print_doctype_node(out, node, flags, indent); - break; - - // Pi - case node_pi: - out = print_pi_node(out, node, flags, indent); - break; - - // Unknown - default: - assert(0); - break; - } - - // If indenting not disabled, add line break after node - if (!(flags & print_no_indenting)) - *out = Ch('\n'), ++out; - - // Return modified iterator - return out; - } - - // Print children of the node - template - inline OutIt print_children(OutIt out, const xml_node *node, int flags, int indent) - { - for (xml_node *child = node->first_node(); child; child = child->next_sibling()) - out = print_node(out, child, flags, indent); - return out; - } - - // Print attributes of the node - template - inline OutIt print_attributes(OutIt out, const xml_node *node, int flags) - { - for (xml_attribute *attribute = node->first_attribute(); attribute; attribute = attribute->next_attribute()) - { - if (attribute->name() && attribute->value()) - { - // Print attribute name - *out = Ch(' '), ++out; - out = copy_chars(attribute->name(), attribute->name() + attribute->name_size(), out); - *out = Ch('='), ++out; - // Print attribute value using appropriate quote type - if (find_char(attribute->value(), attribute->value() + attribute->value_size())) - { - *out = Ch('\''), ++out; - out = copy_and_expand_chars(attribute->value(), attribute->value() + attribute->value_size(), Ch('"'), out); - *out = Ch('\''), ++out; - } - else - { - *out = Ch('"'), ++out; - out = copy_and_expand_chars(attribute->value(), attribute->value() + attribute->value_size(), Ch('\''), out); - *out = Ch('"'), ++out; - } - } - } - return out; - } - - // Print data node - template - inline OutIt print_data_node(OutIt out, const xml_node *node, int flags, int indent) - { - assert(node->type() == node_data); - if (!(flags & print_no_indenting)) - out = fill_chars(out, indent, Ch('\t')); - out = copy_and_expand_chars(node->value(), node->value() + node->value_size(), Ch(0), out); - return out; - } - - // Print data node - template - inline OutIt print_cdata_node(OutIt out, const xml_node *node, int flags, int indent) - { - assert(node->type() == node_cdata); - if (!(flags & print_no_indenting)) - out = fill_chars(out, indent, Ch('\t')); - *out = Ch('<'); ++out; - *out = Ch('!'); ++out; - *out = Ch('['); ++out; - *out = Ch('C'); ++out; - *out = Ch('D'); ++out; - *out = Ch('A'); ++out; - *out = Ch('T'); ++out; - *out = Ch('A'); ++out; - *out = Ch('['); ++out; - out = copy_chars(node->value(), node->value() + node->value_size(), out); - *out = Ch(']'); ++out; - *out = Ch(']'); ++out; - *out = Ch('>'); ++out; - return out; - } - - // Print element node - template - inline OutIt print_element_node(OutIt out, const xml_node *node, int flags, int indent) - { - assert(node->type() == node_element); - - // Print element name and attributes, if any - if (!(flags & print_no_indenting)) - out = fill_chars(out, indent, Ch('\t')); - *out = Ch('<'), ++out; - out = copy_chars(node->name(), node->name() + node->name_size(), out); - out = print_attributes(out, node, flags); - - // If node is childless - if (node->value_size() == 0 && !node->first_node()) - { - // Print childless node tag ending - *out = Ch('/'), ++out; - *out = Ch('>'), ++out; - } - else - { - // Print normal node tag ending - *out = Ch('>'), ++out; - - // Test if node contains a single data node only (and no other nodes) - xml_node *child = node->first_node(); - if (!child) - { - // If node has no children, only print its value without indenting - out = copy_and_expand_chars(node->value(), node->value() + node->value_size(), Ch(0), out); - } - else if (child->next_sibling() == 0 && child->type() == node_data) - { - // If node has a sole data child, only print its value without indenting - out = copy_and_expand_chars(child->value(), child->value() + child->value_size(), Ch(0), out); - } - else - { - // Print all children with full indenting - if (!(flags & print_no_indenting)) - *out = Ch('\n'), ++out; - out = print_children(out, node, flags, indent + 1); - if (!(flags & print_no_indenting)) - out = fill_chars(out, indent, Ch('\t')); - } - - // Print node end - *out = Ch('<'), ++out; - *out = Ch('/'), ++out; - out = copy_chars(node->name(), node->name() + node->name_size(), out); - *out = Ch('>'), ++out; - } - return out; - } - - // Print declaration node - template - inline OutIt print_declaration_node(OutIt out, const xml_node *node, int flags, int indent) - { - // Print declaration start - if (!(flags & print_no_indenting)) - out = fill_chars(out, indent, Ch('\t')); - *out = Ch('<'), ++out; - *out = Ch('?'), ++out; - *out = Ch('x'), ++out; - *out = Ch('m'), ++out; - *out = Ch('l'), ++out; - - // Print attributes - out = print_attributes(out, node, flags); - - // Print declaration end - *out = Ch('?'), ++out; - *out = Ch('>'), ++out; - - return out; - } - - // Print comment node - template - inline OutIt print_comment_node(OutIt out, const xml_node *node, int flags, int indent) - { - assert(node->type() == node_comment); - if (!(flags & print_no_indenting)) - out = fill_chars(out, indent, Ch('\t')); - *out = Ch('<'), ++out; - *out = Ch('!'), ++out; - *out = Ch('-'), ++out; - *out = Ch('-'), ++out; - out = copy_chars(node->value(), node->value() + node->value_size(), out); - *out = Ch('-'), ++out; - *out = Ch('-'), ++out; - *out = Ch('>'), ++out; - return out; - } - - // Print doctype node - template - inline OutIt print_doctype_node(OutIt out, const xml_node *node, int flags, int indent) - { - assert(node->type() == node_doctype); - if (!(flags & print_no_indenting)) - out = fill_chars(out, indent, Ch('\t')); - *out = Ch('<'), ++out; - *out = Ch('!'), ++out; - *out = Ch('D'), ++out; - *out = Ch('O'), ++out; - *out = Ch('C'), ++out; - *out = Ch('T'), ++out; - *out = Ch('Y'), ++out; - *out = Ch('P'), ++out; - *out = Ch('E'), ++out; - *out = Ch(' '), ++out; - out = copy_chars(node->value(), node->value() + node->value_size(), out); - *out = Ch('>'), ++out; - return out; - } - - // Print pi node - template - inline OutIt print_pi_node(OutIt out, const xml_node *node, int flags, int indent) - { - assert(node->type() == node_pi); - if (!(flags & print_no_indenting)) - out = fill_chars(out, indent, Ch('\t')); - *out = Ch('<'), ++out; - *out = Ch('?'), ++out; - out = copy_chars(node->name(), node->name() + node->name_size(), out); - *out = Ch(' '), ++out; - out = copy_chars(node->value(), node->value() + node->value_size(), out); - *out = Ch('?'), ++out; - *out = Ch('>'), ++out; - return out; - } +namespace rapidxml { +/////////////////////////////////////////////////////////////////////// +// Printing flags + +const int print_no_indenting = + 0x1; //!< Printer flag instructing the printer to suppress indenting of XML. + //!< See print() function. + +/////////////////////////////////////////////////////////////////////// +// Internal + +//! \cond internal +namespace internal { + +/////////////////////////////////////////////////////////////////////////// +// Internal character operations + +// Copy characters from given range to given output iterator +template +inline OutIt copy_chars(const Ch *begin, const Ch *end, OutIt out) { + while (begin != end) + *out++ = *begin++; + return out; +} + +// Copy characters from given range to given output iterator and expand +// characters into references (< > ' " &) +template +inline OutIt copy_and_expand_chars(const Ch *begin, const Ch *end, Ch noexpand, + OutIt out) { + while (begin != end) { + if (*begin == noexpand) { + *out++ = *begin; // No expansion, copy character + } else { + switch (*begin) { + case Ch('<'): + *out++ = Ch('&'); + *out++ = Ch('l'); + *out++ = Ch('t'); + *out++ = Ch(';'); + break; + case Ch('>'): + *out++ = Ch('&'); + *out++ = Ch('g'); + *out++ = Ch('t'); + *out++ = Ch(';'); + break; + case Ch('\''): + *out++ = Ch('&'); + *out++ = Ch('a'); + *out++ = Ch('p'); + *out++ = Ch('o'); + *out++ = Ch('s'); + *out++ = Ch(';'); + break; + case Ch('"'): + *out++ = Ch('&'); + *out++ = Ch('q'); + *out++ = Ch('u'); + *out++ = Ch('o'); + *out++ = Ch('t'); + *out++ = Ch(';'); + break; + case Ch('&'): + *out++ = Ch('&'); + *out++ = Ch('a'); + *out++ = Ch('m'); + *out++ = Ch('p'); + *out++ = Ch(';'); + break; + default: + *out++ = *begin; // No expansion, copy character + } } - //! \endcond - - /////////////////////////////////////////////////////////////////////////// - // Printing - - //! Prints XML to given output iterator. - //! \param out Output iterator to print to. - //! \param node Node to be printed. Pass xml_document to print entire document. - //! \param flags Flags controlling how XML is printed. - //! \return Output iterator pointing to position immediately after last character of printed text. - template - inline OutIt print(OutIt out, const xml_node &node, int flags = 0) - { - return internal::print_node(out, &node, flags, 0); - } + ++begin; // Step to next character + } + return out; +} -#ifndef RAPIDXML_NO_STREAMS +// Fill given output iterator with repetitions of the same character +template +inline OutIt fill_chars(OutIt out, int n, Ch ch) { + for (int i = 0; i < n; ++i) + *out++ = ch; + return out; +} - //! Prints XML to given output stream. - //! \param out Output stream to print to. - //! \param node Node to be printed. Pass xml_document to print entire document. - //! \param flags Flags controlling how XML is printed. - //! \return Output stream. - template - inline std::basic_ostream &print(std::basic_ostream &out, const xml_node &node, int flags = 0) - { - print(std::ostream_iterator(out), node, flags); - return out; +// Find character +template +inline bool find_char(const Ch *begin, const Ch *end) { + while (begin != end) + if (*begin++ == ch) + return true; + return false; +} + +/////////////////////////////////////////////////////////////////////////// +// Internal printing operations + +// Print node +template +inline OutIt print_node(OutIt out, const xml_node *node, int flags, + int indent) { + // Print proper node type + switch (node->type()) { + + // Document + case node_document: + out = print_children(out, node, flags, indent); + break; + + // Element + case node_element: + out = print_element_node(out, node, flags, indent); + break; + + // Data + case node_data: + out = print_data_node(out, node, flags, indent); + break; + + // CDATA + case node_cdata: + out = print_cdata_node(out, node, flags, indent); + break; + + // Declaration + case node_declaration: + out = print_declaration_node(out, node, flags, indent); + break; + + // Comment + case node_comment: + out = print_comment_node(out, node, flags, indent); + break; + + // Doctype + case node_doctype: + out = print_doctype_node(out, node, flags, indent); + break; + + // Pi + case node_pi: + out = print_pi_node(out, node, flags, indent); + break; + + // Unknown + default: + assert(0); + break; + } + + // If indenting not disabled, add line break after node + if (!(flags & print_no_indenting)) + *out = Ch('\n'), ++out; + + // Return modified iterator + return out; +} + +// Print children of the node +template +inline OutIt print_children(OutIt out, const xml_node *node, int flags, + int indent) { + for (xml_node *child = node->first_node(); child; + child = child->next_sibling()) + out = print_node(out, child, flags, indent); + return out; +} + +// Print attributes of the node +template +inline OutIt print_attributes(OutIt out, const xml_node *node, int flags) { + for (xml_attribute *attribute = node->first_attribute(); attribute; + attribute = attribute->next_attribute()) { + if (attribute->name() && attribute->value()) { + // Print attribute name + *out = Ch(' '), ++out; + out = copy_chars(attribute->name(), + attribute->name() + attribute->name_size(), out); + *out = Ch('='), ++out; + // Print attribute value using appropriate quote type + if (find_char(attribute->value(), + attribute->value() + + attribute->value_size())) { + *out = Ch('\''), ++out; + out = copy_and_expand_chars( + attribute->value(), attribute->value() + attribute->value_size(), + Ch('"'), out); + *out = Ch('\''), ++out; + } else { + *out = Ch('"'), ++out; + out = copy_and_expand_chars( + attribute->value(), attribute->value() + attribute->value_size(), + Ch('\''), out); + *out = Ch('"'), ++out; + } } + } + return out; +} - //! Prints formatted XML to given output stream. Uses default printing flags. Use print() function to customize printing process. - //! \param out Output stream to print to. - //! \param node Node to be printed. - //! \return Output stream. - template - inline std::basic_ostream &operator <<(std::basic_ostream &out, const xml_node &node) - { - return print(out, node); +// Print data node +template +inline OutIt print_data_node(OutIt out, const xml_node *node, int flags, + int indent) { + assert(node->type() == node_data); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + out = copy_and_expand_chars(node->value(), node->value() + node->value_size(), + Ch(0), out); + return out; +} + +// Print data node +template +inline OutIt print_cdata_node(OutIt out, const xml_node *node, int flags, + int indent) { + assert(node->type() == node_cdata); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'); + ++out; + *out = Ch('!'); + ++out; + *out = Ch('['); + ++out; + *out = Ch('C'); + ++out; + *out = Ch('D'); + ++out; + *out = Ch('A'); + ++out; + *out = Ch('T'); + ++out; + *out = Ch('A'); + ++out; + *out = Ch('['); + ++out; + out = copy_chars(node->value(), node->value() + node->value_size(), out); + *out = Ch(']'); + ++out; + *out = Ch(']'); + ++out; + *out = Ch('>'); + ++out; + return out; +} + +// Print element node +template +inline OutIt print_element_node(OutIt out, const xml_node *node, int flags, + int indent) { + assert(node->type() == node_element); + + // Print element name and attributes, if any + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'), ++out; + out = copy_chars(node->name(), node->name() + node->name_size(), out); + out = print_attributes(out, node, flags); + + // If node is childless + if (node->value_size() == 0 && !node->first_node()) { + // Print childless node tag ending + *out = Ch('/'), ++out; + *out = Ch('>'), ++out; + } else { + // Print normal node tag ending + *out = Ch('>'), ++out; + + // Test if node contains a single data node only (and no other nodes) + xml_node *child = node->first_node(); + if (!child) { + // If node has no children, only print its value without indenting + out = copy_and_expand_chars( + node->value(), node->value() + node->value_size(), Ch(0), out); + } else if (child->next_sibling() == 0 && child->type() == node_data) { + // If node has a sole data child, only print its value without indenting + out = copy_and_expand_chars( + child->value(), child->value() + child->value_size(), Ch(0), out); + } else { + // Print all children with full indenting + if (!(flags & print_no_indenting)) + *out = Ch('\n'), ++out; + out = print_children(out, node, flags, indent + 1); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); } -#endif + // Print node end + *out = Ch('<'), ++out; + *out = Ch('/'), ++out; + out = copy_chars(node->name(), node->name() + node->name_size(), out); + *out = Ch('>'), ++out; + } + return out; +} + +// Print declaration node +template +inline OutIt print_declaration_node(OutIt out, const xml_node *node, + int flags, int indent) { + // Print declaration start + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'), ++out; + *out = Ch('?'), ++out; + *out = Ch('x'), ++out; + *out = Ch('m'), ++out; + *out = Ch('l'), ++out; + + // Print attributes + out = print_attributes(out, node, flags); + + // Print declaration end + *out = Ch('?'), ++out; + *out = Ch('>'), ++out; + + return out; +} + +// Print comment node +template +inline OutIt print_comment_node(OutIt out, const xml_node *node, int flags, + int indent) { + assert(node->type() == node_comment); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'), ++out; + *out = Ch('!'), ++out; + *out = Ch('-'), ++out; + *out = Ch('-'), ++out; + out = copy_chars(node->value(), node->value() + node->value_size(), out); + *out = Ch('-'), ++out; + *out = Ch('-'), ++out; + *out = Ch('>'), ++out; + return out; +} + +// Print doctype node +template +inline OutIt print_doctype_node(OutIt out, const xml_node *node, int flags, + int indent) { + assert(node->type() == node_doctype); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'), ++out; + *out = Ch('!'), ++out; + *out = Ch('D'), ++out; + *out = Ch('O'), ++out; + *out = Ch('C'), ++out; + *out = Ch('T'), ++out; + *out = Ch('Y'), ++out; + *out = Ch('P'), ++out; + *out = Ch('E'), ++out; + *out = Ch(' '), ++out; + out = copy_chars(node->value(), node->value() + node->value_size(), out); + *out = Ch('>'), ++out; + return out; +} + +// Print pi node +template +inline OutIt print_pi_node(OutIt out, const xml_node *node, int flags, + int indent) { + assert(node->type() == node_pi); + if (!(flags & print_no_indenting)) + out = fill_chars(out, indent, Ch('\t')); + *out = Ch('<'), ++out; + *out = Ch('?'), ++out; + out = copy_chars(node->name(), node->name() + node->name_size(), out); + *out = Ch(' '), ++out; + out = copy_chars(node->value(), node->value() + node->value_size(), out); + *out = Ch('?'), ++out; + *out = Ch('>'), ++out; + return out; +} + +} // namespace internal +//! \endcond + +/////////////////////////////////////////////////////////////////////////// +// Printing + +//! Prints XML to given output iterator. +//! \param out Output iterator to print to. +//! \param node Node to be printed. Pass xml_document to print entire document. +//! \param flags Flags controlling how XML is printed. +//! \return Output iterator pointing to position immediately after last +//! character of printed text. +template +inline OutIt print(OutIt out, const xml_node &node, int flags = 0) { + return internal::print_node(out, &node, flags, 0); +} + +#ifndef RAPIDXML_NO_STREAMS +//! Prints XML to given output stream. +//! \param out Output stream to print to. +//! \param node Node to be printed. Pass xml_document to print entire document. +//! \param flags Flags controlling how XML is printed. +//! \return Output stream. +template +inline std::basic_ostream &print(std::basic_ostream &out, + const xml_node &node, int flags = 0) { + print(std::ostream_iterator(out), node, flags); + return out; } +//! Prints formatted XML to given output stream. Uses default printing flags. +//! Use print() function to customize printing process. +//! \param out Output stream to print to. +//! \param node Node to be printed. +//! \return Output stream. +template +inline std::basic_ostream &operator<<(std::basic_ostream &out, + const xml_node &node) { + return print(out, node); +} + +#endif + +} // namespace rapidxml + #endif diff --git a/src/rapidxml/rapidxml_utils.hpp b/src/rapidxml/rapidxml_utils.hpp index 5eafa35..9aa9322 100644 --- a/src/rapidxml/rapidxml_utils.hpp +++ b/src/rapidxml/rapidxml_utils.hpp @@ -4,119 +4,99 @@ // Copyright (C) 2006, 2009 Marcin Kalicinski // Version 1.13 // Revision $DateTime: 2009/05/13 01:46:17 $ -//! \file rapidxml_utils.hpp This file contains high-level rapidxml utilities that can be useful -//! in certain simple scenarios. They should probably not be used if maximizing performance is the main objective. +//! \file rapidxml_utils.hpp This file contains high-level rapidxml utilities +//! that can be useful in certain simple scenarios. They should probably not be +//! used if maximizing performance is the main objective. #include "rapidxml.hpp" -#include -#include #include #include +#include +#include -namespace rapidxml -{ - - //! Represents data loaded from a file - template - class file - { - - public: - - //! Loads file into the memory. Data will be automatically destroyed by the destructor. - //! \param filename Filename to load. - file(const char *filename) - { - using namespace std; - - // Open stream - basic_ifstream stream(filename, ios::binary); - if (!stream) - throw runtime_error(string("cannot open file ") + filename); - stream.unsetf(ios::skipws); - - // Determine stream size - stream.seekg(0, ios::end); - size_t size = stream.tellg(); - stream.seekg(0); - - // Load data and add terminating 0 - m_data.resize(size + 1); - stream.read(&m_data.front(), static_cast(size)); - m_data[size] = 0; - } - - //! Loads file into the memory. Data will be automatically destroyed by the destructor - //! \param stream Stream to load from - file(std::basic_istream &stream) - { - using namespace std; - - // Load data and add terminating 0 - stream.unsetf(ios::skipws); - m_data.assign(istreambuf_iterator(stream), istreambuf_iterator()); - if (stream.fail() || stream.bad()) - throw runtime_error("error reading stream"); - m_data.push_back(0); - } - - //! Gets file data. - //! \return Pointer to data of file. - Ch *data() - { - return &m_data.front(); - } - - //! Gets file data. - //! \return Pointer to data of file. - const Ch *data() const - { - return &m_data.front(); - } - - //! Gets file data size. - //! \return Size of file data, in characters. - std::size_t size() const - { - return m_data.size(); - } - - private: - - std::vector m_data; // File data - - }; - - //! Counts children of node. Time complexity is O(n). - //! \return Number of children of node - template - inline std::size_t count_children(xml_node *node) - { - xml_node *child = node->first_node(); - std::size_t count = 0; - while (child) - { - ++count; - child = child->next_sibling(); - } - return count; - } - - //! Counts attributes of node. Time complexity is O(n). - //! \return Number of attributes of node - template - inline std::size_t count_attributes(xml_node *node) - { - xml_attribute *attr = node->first_attribute(); - std::size_t count = 0; - while (attr) - { - ++count; - attr = attr->next_attribute(); - } - return count; - } +namespace rapidxml { + +//! Represents data loaded from a file +template class file { + +public: + //! Loads file into the memory. Data will be automatically destroyed by the + //! destructor. + //! \param filename Filename to load. + file(const char *filename) { + using namespace std; + + // Open stream + basic_ifstream stream(filename, ios::binary); + if (!stream) + throw runtime_error(string("cannot open file ") + filename); + stream.unsetf(ios::skipws); + + // Determine stream size + stream.seekg(0, ios::end); + size_t size = stream.tellg(); + stream.seekg(0); + + // Load data and add terminating 0 + m_data.resize(size + 1); + stream.read(&m_data.front(), static_cast(size)); + m_data[size] = 0; + } + + //! Loads file into the memory. Data will be automatically destroyed by the + //! destructor + //! \param stream Stream to load from + file(std::basic_istream &stream) { + using namespace std; + + // Load data and add terminating 0 + stream.unsetf(ios::skipws); + m_data.assign(istreambuf_iterator(stream), istreambuf_iterator()); + if (stream.fail() || stream.bad()) + throw runtime_error("error reading stream"); + m_data.push_back(0); + } + + //! Gets file data. + //! \return Pointer to data of file. + Ch *data() { return &m_data.front(); } + + //! Gets file data. + //! \return Pointer to data of file. + const Ch *data() const { return &m_data.front(); } + + //! Gets file data size. + //! \return Size of file data, in characters. + std::size_t size() const { return m_data.size(); } + +private: + std::vector m_data; // File data +}; + +//! Counts children of node. Time complexity is O(n). +//! \return Number of children of node +template inline std::size_t count_children(xml_node *node) { + xml_node *child = node->first_node(); + std::size_t count = 0; + while (child) { + ++count; + child = child->next_sibling(); + } + return count; +} +//! Counts attributes of node. Time complexity is O(n). +//! \return Number of attributes of node +template inline std::size_t count_attributes(xml_node *node) { + xml_attribute *attr = node->first_attribute(); + std::size_t count = 0; + while (attr) { + ++count; + attr = attr->next_attribute(); + } + return count; } +} // namespace rapidxml + #endif diff --git a/src/read_flat_ods_.cpp b/src/read_flat_ods_.cpp index c7268c8..3001267 100644 --- a/src/read_flat_ods_.cpp +++ b/src/read_flat_ods_.cpp @@ -2,40 +2,80 @@ #include "read_ods_internals.h" [[cpp11::register]] -cpp11::strings read_flat_ods_(const std::string file, - int start_row, int stop_row, int start_col, int stop_col, +cpp11::strings read_flat_ods_(const std::string &file, int start_row, + int stop_row, int start_col, int stop_col, const int sheet_index, const bool formula_as_formula) { - if(!is_flat_ods(file)){ - throw std::invalid_argument(file + " is not a correct FODS file"); - } - if(sheet_index < 1){ - throw std::invalid_argument("Cannot have sheet index less than 1"); - } - std::string xmlFile; - std::ifstream in(file, std::ios::in | std::ios::binary); - if (in) { - in.seekg(0, std::ios::end); - xmlFile.resize(in.tellg()); - in.seekg(0, std::ios::beg); - in.read(&xmlFile[0], xmlFile.size()); - in.close(); - } else{ - throw std::invalid_argument("No such file"); - } - rapidxml::xml_document<> spreadsheet; - - xmlFile.push_back('\0'); + // Validate inputs early + if (sheet_index < 1) { + throw std::invalid_argument("Cannot have sheet index less than 1"); + } + + if (!is_flat_ods(file)) { + throw std::invalid_argument(file + " is not a correct FODS file"); + } + + // More efficient file reading with better memory management + std::ifstream in(file, std::ios::in | std::ios::binary); + if (!in) { + throw std::invalid_argument("No such file: " + file); + } + + // Get file size more efficiently + in.seekg(0, std::ios::end); + std::streamsize file_size = in.tellg(); + if (file_size <= 0) { + throw std::invalid_argument("Empty or invalid file: " + file); + } + + in.seekg(0, std::ios::beg); + + // Reserve memory with extra space for null terminator + std::string xmlFile; + xmlFile.reserve(static_cast(file_size) + 1); + xmlFile.resize(static_cast(file_size)); + + // Read file in one operation + if (!in.read(&xmlFile[0], file_size)) { + throw std::invalid_argument("Error reading file: " + file); + } + in.close(); + + // Add null terminator for RapidXML + xmlFile.push_back('\0'); + + // Parse XML with error handling + rapidxml::xml_document<> spreadsheet; + try { spreadsheet.parse<0>(&xmlFile[0]); + } catch (const rapidxml::parse_error &e) { + throw std::invalid_argument("XML parsing error: " + std::string(e.what())); + } + + // Navigate to root node with null checks + rapidxml::xml_node<> *doc_node = spreadsheet.first_node("office:document"); + if (!doc_node) { + throw std::invalid_argument( + "Invalid ODS structure: missing office:document"); + } + + rapidxml::xml_node<> *body_node = doc_node->first_node("office:body"); + if (!body_node) { + throw std::invalid_argument("Invalid ODS structure: missing office:body"); + } + + rapidxml::xml_node<> *spreadsheet_node = + body_node->first_node("office:spreadsheet"); + if (!spreadsheet_node) { + throw std::invalid_argument( + "Invalid ODS structure: missing office:spreadsheet"); + } + + rapidxml::xml_node<> *rootNode = spreadsheet_node->first_node("table:table"); + if (!rootNode) { + throw std::invalid_argument("Invalid ODS structure: missing table:table"); + } - rapidxml::xml_node<>* rootNode; - rootNode = spreadsheet.first_node("office:document")->first_node("office:body")-> - first_node("office:spreadsheet")->first_node("table:table"); - return read_cell_values_(rootNode, - start_row, - stop_row, - start_col, - stop_col, - sheet_index, - formula_as_formula); + return read_cell_values_(rootNode, start_row, stop_row, start_col, stop_col, + sheet_index, formula_as_formula); } diff --git a/src/read_ods_.cpp b/src/read_ods_.cpp index 6cfbf5a..73385c9 100644 --- a/src/read_ods_.cpp +++ b/src/read_ods_.cpp @@ -2,30 +2,55 @@ #include "read_ods_internals.h" [[cpp11::register]] -cpp11::strings read_ods_(const std::string file, - int start_row, - int stop_row, - int start_col, - int stop_col, - const int sheet_index, - const bool formula_as_formula) { - if(!is_ods(file)){ - throw std::invalid_argument(file + " is not a correct ODS file"); - } - if(sheet_index < 1){ - throw std::invalid_argument("Cannot have sheet index less than 1"); - } - std::string xmlFile = zip_buffer(file, "content.xml"); - rapidxml::xml_document<> spreadsheet; +cpp11::strings read_ods_(const std::string &file, int start_row, int stop_row, + int start_col, int stop_col, const int sheet_index, + const bool formula_as_formula) { + // Validate inputs early + if (sheet_index < 1) { + throw std::invalid_argument("Cannot have sheet index less than 1"); + } + + if (!is_ods(file)) { + throw std::invalid_argument(file + " is not a correct ODS file"); + } + + // Extract XML content from ZIP + std::string xmlFile = zip_buffer(file, "content.xml"); + if (xmlFile.empty()) { + throw std::invalid_argument("Could not extract content.xml from " + file); + } + + // Parse XML with error handling + rapidxml::xml_document<> spreadsheet; + try { spreadsheet.parse<0>(&xmlFile[0]); - rapidxml::xml_node<>* rootNode; - rootNode = spreadsheet.first_node()->first_node("office:body")-> - first_node("office:spreadsheet")->first_node("table:table"); - return read_cell_values_(rootNode, - start_row, - stop_row, - start_col, - stop_col, - sheet_index, - formula_as_formula); + } catch (const rapidxml::parse_error &e) { + throw std::invalid_argument("XML parsing error: " + std::string(e.what())); + } + + // Navigate to root node with null checks + rapidxml::xml_node<> *doc_node = spreadsheet.first_node(); + if (!doc_node) { + throw std::invalid_argument("Invalid ODS structure: missing document root"); + } + + rapidxml::xml_node<> *body_node = doc_node->first_node("office:body"); + if (!body_node) { + throw std::invalid_argument("Invalid ODS structure: missing office:body"); + } + + rapidxml::xml_node<> *spreadsheet_node = + body_node->first_node("office:spreadsheet"); + if (!spreadsheet_node) { + throw std::invalid_argument( + "Invalid ODS structure: missing office:spreadsheet"); + } + + rapidxml::xml_node<> *rootNode = spreadsheet_node->first_node("table:table"); + if (!rootNode) { + throw std::invalid_argument("Invalid ODS structure: missing table:table"); + } + + return read_cell_values_(rootNode, start_row, stop_row, start_col, stop_col, + sheet_index, formula_as_formula); } diff --git a/src/read_ods_internals.cpp b/src/read_ods_internals.cpp index 085fa8d..ed20e0a 100644 --- a/src/read_ods_internals.cpp +++ b/src/read_ods_internals.cpp @@ -1,252 +1,315 @@ #include "read_ods_internals.h" -std::string parse_p(rapidxml::xml_node<>* node){ - /*Deal with text inside cells. Cells can contain just text (node_data), or a - mixture of text and other nodes (node_element). We usually just want the text - from these nodes (e.g. if there's a link), but we also need to consider the - text:s node, which saves repeated spaces*/ - std::string out; - char* name; - int rep_space; - for (rapidxml::xml_node<>* n = node->first_node(); n; n=n->next_sibling()){ - if (n->type() == rapidxml::node_element) - { - name = n->name(); - if (strcmp(name,"text:s") == 0){ - if(n->first_attribute("text:c") != NULL){ - rep_space = atoi(n->first_attribute("text:c")->value()); - } else { - rep_space = 1; - } - out = out.append(std::string(rep_space, ' ')); - } else if (strcmp(name,"text:line-break") == 0){ - out = out.append("\n"); - } else if (strcmp(name, "text:a") == 0){ - if(!(n->first_node("text:a"))){ //Prevent crash by making pathological recursive links - out = out.append(parse_p(n)); - } - } else { - out = out.append(n->value()); - } +std::string parse_p(rapidxml::xml_node<> *node) { + /*Deal with text inside cells. Cells can contain just text (node_data), or a + mixture of text and other nodes (node_element). We usually just want the text + from these nodes (e.g. if there's a link), but we also need to consider the + text:s node, which saves repeated spaces*/ + std::string out; + out.reserve(256); // Pre-allocate memory to reduce reallocations + char *name; + int rep_space; + + // Cache commonly used string literals + static const char *text_s = "text:s"; + static const char *text_line_break = "text:line-break"; + static const char *text_a = "text:a"; + static const char *text_c = "text:c"; + + for (rapidxml::xml_node<> *n = node->first_node(); n; n = n->next_sibling()) { + if (n->type() == rapidxml::node_element) { + name = n->name(); + if (strcmp(name, text_s) == 0) { + if (n->first_attribute(text_c) != NULL) { + rep_space = atoi(n->first_attribute(text_c)->value()); + } else { + rep_space = 1; } - else if (n->type() == rapidxml::node_data){ - out = out.append(n->value()); + out.append(rep_space, ' '); // More efficient than creating temp string + } else if (strcmp(name, text_line_break) == 0) { + out += '\n'; // More efficient than append for single chars + } else if (strcmp(name, text_a) == 0) { + if (!(n->first_node(text_a))) { // Prevent crash by making pathological + // recursive links + out += parse_p(n); } + } else { + if (n->value()) + out += n->value(); // Check for null to avoid crashes + } + } else if (n->type() == rapidxml::node_data) { + if (n->value()) + out += n->value(); } - return out; + } + return out; } -std::string parse_textp(rapidxml::xml_node<>* cell){ - std::string out = ""; - int i = 0; - for (rapidxml::xml_node<>* n = cell->first_node("text:p"); n ; n=n->next_sibling("text:p")){ - if (i > 0){ - out = out.append("\n"); - } - if(n->first_node()){ - out = out.append(parse_p(n)); - i++; - } +std::string parse_textp(rapidxml::xml_node<> *cell) { + std::string out; + out.reserve(512); // Pre-allocate memory + bool first = true; + + static const char *text_p = "text:p"; + + for (rapidxml::xml_node<> *n = cell->first_node(text_p); n; + n = n->next_sibling(text_p)) { + if (n->first_node()) { + if (!first) { + out += '\n'; + } + out += parse_p(n); + first = false; } - return out; + } + return out; } -std::string parse_single_cell(rapidxml::xml_node<>* cell, bool formula_as_formula, bool use_office_value){ - std::string cell_value; - char* value_type = (cell->first_attribute("office:value-type") != 0) ? - cell->first_attribute("office:value-type")->value() : NULL; - if(formula_as_formula && cell->first_attribute("table:formula")){ - cell_value = cell->first_attribute("table:formula")->value(); - } else { - cell_value = (cell->first_node("text:p") != 0) ? parse_textp(cell) : ""; - if((value_type) && - ((cell_value.length() == 0 && use_office_value && cell->first_attribute("office:value") != 0) || - ((strcmp(value_type, "float") == 0 || - strcmp(value_type, "currency") == 0|| - strcmp(value_type, "percentage") == 0)))){ - cell_value = cell->first_attribute("office:value")->value(); - } +std::string parse_single_cell(rapidxml::xml_node<> *cell, + const bool formula_as_formula, + const bool use_office_value) { + std::string cell_value; + cell_value.reserve(64); // Pre-allocate small buffer + + static const char *office_value_type = "office:value-type"; + static const char *table_formula = "table:formula"; + static const char *text_p = "text:p"; + static const char *office_value = "office:value"; + static const char *float_type = "float"; + static const char *currency_type = "currency"; + static const char *percentage_type = "percentage"; + + char *value_type = (cell->first_attribute(office_value_type) != 0) + ? cell->first_attribute(office_value_type)->value() + : NULL; + + if (formula_as_formula && cell->first_attribute(table_formula)) { + cell_value = cell->first_attribute(table_formula)->value(); + } else { + rapidxml::xml_node<> *text_node = cell->first_node(text_p); + if (text_node != 0) { + cell_value = parse_textp(cell); } - return cell_value; + + if (value_type && ((cell_value.empty() && use_office_value && + cell->first_attribute(office_value) != 0) || + ((strcmp(value_type, float_type) == 0 || + strcmp(value_type, currency_type) == 0 || + strcmp(value_type, percentage_type) == 0)))) { + rapidxml::xml_attribute<> *office_val = + cell->first_attribute(office_value); + if (office_val) { + cell_value = office_val->value(); + } + } + } + return cell_value; } // Make an array of pointers to each cell -std::vector*>> find_rows(rapidxml::xml_node<>* sheet, - int start_row, - const int stop_row, - int start_col, - const int stop_col){ +std::vector *>> +find_rows(rapidxml::xml_node<> *sheet, const int start_row, const int stop_row, + const int start_col, const int stop_col) { - /*Rows and columns are 1-based because both Excel and R treat arrays - this way*/ - int row_repeat_count; - int col_repeat_count; + /*Rows and columns are 1-based because both Excel and R treat arrays + this way*/ + int row_repeat_count; + int col_repeat_count; - rapidxml::xml_node<>* cell; + rapidxml::xml_node<> *cell; - if (start_row < 1){ - start_row = 1; - } - if (start_col < 1){ - start_col = 1; - } - int nrows = stop_row - start_row + 1; + // Make local copies that can be modified + int actual_start_row = start_row; + int actual_start_col = start_col; - std::vector*>> rows((nrows < 1) ? 1 : nrows); + if (actual_start_row < 1) { + actual_start_row = 1; + } + if (actual_start_col < 1) { + actual_start_col = 1; + } + int nrows = stop_row - actual_start_row + 1; - rapidxml::xml_node<>* row = sheet->first_node("table:table-row"); + std::vector *>> rows; + rows.reserve((nrows < 1) ? 100 + : nrows); // Reserve space to avoid reallocations - // If table has no rows or cells, return blank - if (row == 0 || row->first_node("table:table-cell") == 0){ - return rows; - } + static const char *table_table_row = "table:table-row"; + static const char *table_table_cell = "table:table-cell"; - for (int i = 1; i <= stop_row || stop_row < 1; ){ - // i keeps track of what nominal row we are on + rapidxml::xml_node<> *row = sheet->first_node(table_table_row); + // If table has no rows or cells, return blank + if (row == 0 || row->first_node(table_table_cell) == 0) { + return rows; + } - // Check for row repeats - if (row->first_attribute("table:number-rows-repeated") == nullptr){ - row_repeat_count = 1; - } else { - row_repeat_count = std::atoi(row->first_attribute("table:number-rows-repeated")->value()); + for (int i = 1; i <= stop_row || stop_row < 1;) { + // i keeps track of what nominal row we are on + + // Check for row repeats + if (row->first_attribute("table:number-rows-repeated") == nullptr) { + row_repeat_count = 1; + } else { + row_repeat_count = std::atoi( + row->first_attribute("table:number-rows-repeated")->value()); + } + // Stop if all repeats done, or if we're at the last requested row + for (int r_repeat = 0; + r_repeat < row_repeat_count && (stop_row < 1 || i <= stop_row); + r_repeat++) { + + // Check size of container and resize if needed + int required_size = i - actual_start_row + 1; + if ((int)rows.size() < required_size) { + rows.resize(std::max((int)rows.size() * 2, required_size)); + } + // If this row is blank (i.e. it contains only one or no children, which + // have no contents) + if (row->first_node()->next_sibling() == 0 && + row->first_node()->first_node() == 0) { + // Look ahead. If this is the last row, stop, otherwise add a blank row + if (row->next_sibling() == 0) { + break; } - // Stop if all repeats done, or if we're at the last requested row - for (int r_repeat = 0; r_repeat < row_repeat_count && (stop_row < 1 || i <= stop_row); r_repeat++){ + // Otherwise leave the row blank - // Check size of container. - if ((int)rows.size() < i - start_row + 1){ - rows.resize(rows.size() * 2); + // if row is not blank, and in range deal with cells + } else if (i >= actual_start_row) { + unsigned int last_non_blank = 0; + cell = row->first_node(); + for (int j = 1; j <= stop_col || stop_col < 1;) { + // find first cell or covered cell + static const char *table_covered_table_cell = + "table:covered-table-cell"; + + while (cell != 0) { + const char *cell_name = cell->name(); + if (strcmp(cell_name, table_table_cell) == 0 || + strcmp(cell_name, table_covered_table_cell) == 0) { + break; + } else { + cell = cell->next_sibling(); } - // If this row is blank (i.e. it contains only one or no children, which have no contents) - if (row->first_node()->next_sibling() == 0 && row->first_node()->first_node() == 0){ - // Look ahead. If this is the last row, stop, otherwise add a blank row - if(row->next_sibling() == 0){ - break; - } - // Otherwise leave the row blank - - // if row is not blank, and in range deal with cells - } else if(i >= start_row) { - unsigned int last_non_blank = 0; - cell = row->first_node(); - for (int j = 1; j <= stop_col || stop_col < 1; ){ - // find first cell or covered cell - while(cell != 0){ - if (strcmp(cell->name(),"table:table-cell")==0 || strcmp(cell->name(), "table:covered-table-cell")==0){ - break; - } else { - cell = cell->next_sibling(); - } - } - // Check for column repeats - if (cell->first_attribute("table:number-columns-repeated")){ - col_repeat_count = std::atoi(cell->first_attribute("table:number-columns-repeated")->value()); - } else { - col_repeat_count = 1; - } - - // Stop if all column repeats done, or if we're at the last requested row - for (int c_repeat = 0; c_repeat < col_repeat_count && (stop_col < 1 || j <= stop_col); c_repeat++){ - bool is_blank = true; - // If this cell is blank (i.e. contains no children) - if (cell->first_node() == 0){ - // Look ahead. If this is the last column, stop. - if(cell->next_sibling() == 0){ - break; - } - } else { - // Otherwise mark that cell is not blank - is_blank = false; - } - // If we're in range add pointer to the array - if (stop_col < 1 || j >= start_col){ - rows[i - start_row].push_back(cell); - if(!is_blank){ - last_non_blank = rows[i - start_row].size(); - } - } - j++; - } - cell = cell->next_sibling(); - // If that was the last cell, stop. - if (cell == 0){ - break; - } - - } - // Remove trailing blank cells - rows[i - start_row].resize(last_non_blank); + } + // Check for column repeats + if (cell->first_attribute("table:number-columns-repeated")) { + col_repeat_count = + std::atoi(cell->first_attribute("table:number-columns-repeated") + ->value()); + } else { + col_repeat_count = 1; + } + // Stop if all column repeats done, or if we're at the last requested + // row + for (int c_repeat = 0; + c_repeat < col_repeat_count && (stop_col < 1 || j <= stop_col); + c_repeat++) { + bool is_blank = true; + // If this cell is blank (i.e. contains no children) + if (cell->first_node() == 0) { + // Look ahead. If this is the last column, stop. + if (cell->next_sibling() == 0) { + break; + } + } else { + // Otherwise mark that cell is not blank + is_blank = false; } - i++; - } - row = row->next_sibling("table:table-row"); - // If that was the last row, stop. - if (row == 0){ + // If we're in range add pointer to the array + if (stop_col < 1 || j >= actual_start_col) { + auto ¤t_row = rows[i - actual_start_row]; + current_row.push_back(cell); + if (!is_blank) { + last_non_blank = current_row.size(); + } + } + j++; + } + cell = cell->next_sibling(); + // If that was the last cell, stop. + if (cell == 0) { break; + } } - + // Remove trailing blank cells + rows[i - actual_start_row].resize(last_non_blank); + } + i++; } - // Remove trailing empty elements - unsigned int rowsize = 0; - for (unsigned int i = 0; i < rows.size(); i++){ - if(rows[i].size() > 0){ - rowsize = i; - } + row = row->next_sibling(table_table_row); + // If that was the last row, stop. + if (row == 0) { + break; } - rows.resize(rowsize + 1); - return rows; + } + // Remove trailing empty elements + unsigned int rowsize = 0; + for (unsigned int i = 0; i < rows.size(); i++) { + if (rows[i].size() > 0) { + rowsize = i; + } + } + rows.resize(rowsize + 1); + return rows; } -// read cell_values (an R character vector) out of the rootNode of the XML document -cpp11::strings read_cell_values_(rapidxml::xml_node<>* rootNode, - int start_row, - int stop_row, - int start_col, - int stop_col, - const int sheet_index, - const bool formula_as_formula) { - unsigned int out_width = 0; - unsigned int out_length; - for (int i = 1; i < sheet_index; i++){ - rootNode = rootNode->next_sibling("table:table"); - } - std::vector*>> contents; - contents = find_rows(rootNode, start_row,stop_row,start_col,stop_col); - // Get dimensions of output - out_length = contents.size(); - for (unsigned int i = 0; i < contents.size(); i++){ - if (contents[i].size() > out_width){ - out_width = contents[i].size(); - } +// read cell_values (an R character vector) out of the rootNode of the XML +// document +cpp11::strings read_cell_values_(rapidxml::xml_node<> *rootNode, + const int start_row, const int stop_row, + const int start_col, const int stop_col, + const int sheet_index, + const bool formula_as_formula) { + unsigned int out_width = 0; + unsigned int out_length; + for (int i = 1; i < sheet_index; i++) { + rootNode = rootNode->next_sibling("table:table"); + } + std::vector *>> contents; + contents = find_rows(rootNode, start_row, stop_row, start_col, stop_col); + // Get dimensions of output + out_length = contents.size(); + for (unsigned int i = 0; i < contents.size(); i++) { + if (contents[i].size() > out_width) { + out_width = contents[i].size(); } - // If there is no content - if (out_width * out_length == 0){ - cpp11::writable::strings cell_values(2); - cell_values[0] = "0"; - cell_values[1] = "0"; - return cell_values; + } + // If there is no content + if (out_width * out_length == 0) { + cpp11::writable::strings cell_values(2); + cell_values[0] = "0"; + cell_values[1] = "0"; + return cell_values; + } + cpp11::writable::strings cell_values(out_width * out_length + 2); + cell_values[0] = std::to_string(out_width); + cell_values[1] = std::to_string(out_length); + + int t = 2; + static const SEXP empty_string = Rf_mkCharCE("", CE_UTF8); + + for (unsigned int i = 0; i < contents.size(); i++) { + const auto &row = contents[i]; + for (unsigned int j = 0; j < row.size(); j++) { + if (row[j] != 0) { + std::string cell_content = + parse_single_cell(row[j], formula_as_formula, true); + cell_values[t] = Rf_mkCharCE(cell_content.c_str(), CE_UTF8); + } else { + cell_values[t] = NA_STRING; + } + t++; } - cpp11::writable::strings cell_values(out_width*out_length + 2); - cell_values[0] = std::to_string(out_width); - cell_values[1] = std::to_string(out_length); - - int t = 2; - for (unsigned int i = 0; i < contents.size(); i++){ - for (unsigned int j = 0; j < contents[i].size(); j++){ - cell_values[t] = (contents[i][j] != 0) ? - Rf_mkCharCE(parse_single_cell(contents[i][j], formula_as_formula, true).c_str(), CE_UTF8) : NA_STRING; - t++; - } - // Pad rows to even width - if(contents[i].size() < out_width){ - unsigned int row_width = contents[i].size(); - for (unsigned int j = 0; j + row_width < out_width; j++){ - cell_values[t] = ""; - t++; - } - } + // Pad rows to even width more efficiently + unsigned int row_width = row.size(); + if (row_width < out_width) { + for (unsigned int j = row_width; j < out_width; j++) { + cell_values[t] = empty_string; + t++; + } } - return cell_values; + } + return cell_values; } diff --git a/src/read_ods_internals.h b/src/read_ods_internals.h index 496ea00..d98505a 100644 --- a/src/read_ods_internals.h +++ b/src/read_ods_internals.h @@ -1,29 +1,30 @@ #pragma once #include "cpp11.hpp" -#include "cpp11/r_string.hpp" #include "rapidxml/rapidxml.hpp" -#include -#include +#include // For std::max +#include // For strcmp optimizations #include #include #include +#include +#include + +// Function declarations with const correctness and optimization hints +std::string parse_p(rapidxml::xml_node<> *node); +std::string parse_textp(rapidxml::xml_node<> *cell); +std::string parse_single_cell(rapidxml::xml_node<> *cell, + const bool formula_as_formula, + const bool use_office_value); -std::string parse_p(rapidxml::xml_node<>* node); -std::string parse_textp(rapidxml::xml_node<>* cell); -std::string parse_single_cell(rapidxml::xml_node<>* cell, bool formula_as_formula, bool use_office_value); -std::vector*>> find_rows(rapidxml::xml_node<>* sheet, - int start_row, - const int stop_row, - int start_col, - const int stop_col); +std::vector *>> +find_rows(rapidxml::xml_node<> *sheet, const int start_row, const int stop_row, + const int start_col, const int stop_col); -cpp11::strings read_cell_values_(rapidxml::xml_node<>* rootNode, - int start_row, - int stop_row, - int start_col, - int stop_col, +cpp11::strings read_cell_values_(rapidxml::xml_node<> *rootNode, + const int start_row, const int stop_row, + const int start_col, const int stop_col, const int sheet_index, const bool formula_as_formula); diff --git a/src/readxl/zip.cpp b/src/readxl/zip.cpp index 63b5c07..cab697f 100644 --- a/src/readxl/zip.cpp +++ b/src/readxl/zip.cpp @@ -4,9 +4,8 @@ #include "cpp11/function.hpp" #include "cpp11/raws.hpp" - -std::string zip_buffer(const std::string& zip_path, - const std::string& file_path) { +std::string zip_buffer(const std::string &zip_path, + const std::string &file_path) { cpp11::function zip_buffer = cpp11::package("readODS")["zip_buffer"]; cpp11::raws xml(zip_buffer(zip_path, file_path)); @@ -16,8 +15,7 @@ std::string zip_buffer(const std::string& zip_path, return buffer; } -bool zip_has_file(const std::string& zip_path, - const std::string& file_path) { +bool zip_has_file(const std::string &zip_path, const std::string &file_path) { cpp11::function zip_has_file = cpp11::package("readODS")["zip_has_file"]; return zip_has_file(zip_path, file_path); } diff --git a/src/readxl/zip.h b/src/readxl/zip.h index 463f6e5..ecdb759 100644 --- a/src/readxl/zip.h +++ b/src/readxl/zip.h @@ -3,5 +3,6 @@ #include "../rapidxml/rapidxml.hpp" #include -std::string zip_buffer(const std::string& zip_path, const std::string& file_path); -bool zip_has_file(const std::string& zip_path, const std::string& file_path); \ No newline at end of file +std::string zip_buffer(const std::string &zip_path, + const std::string &file_path); +bool zip_has_file(const std::string &zip_path, const std::string &file_path); \ No newline at end of file diff --git a/src/splice.cpp b/src/splice.cpp index 95bf5c5..69286ba 100644 --- a/src/splice.cpp +++ b/src/splice.cpp @@ -1,72 +1,167 @@ #include "splice.h" [[cpp11::register]] -std::string splice_sheet_(const std::string original_xml, const std::string sheet_file, const bool flat) { +std::string splice_sheet_(const std::string &original_xml, + const std::string &sheet_file, const bool flat) { + try { rapidxml::xml_document<> spreadsheet1; - // read the content in heap - rapidxml::file<> *xml_file = new rapidxml::file<>(original_xml.c_str()); - spreadsheet1.parse<0>((char*)xml_file->data()); - rapidxml::xml_node<>* root_node; + + // Use RAII for automatic memory management + std::unique_ptr> xml_file( + new rapidxml::file<>(original_xml.c_str())); + spreadsheet1.parse((char *)xml_file->data()); + + // Cache string literals for performance + static const char *office_body = "office:body"; + static const char *office_spreadsheet = "office:spreadsheet"; + static const char *office_document = "office:document"; + static const char *table_table = "table:table"; + + rapidxml::xml_node<> *root_node = nullptr; if (!flat) { - root_node = spreadsheet1.first_node()->first_node("office:body")-> - first_node("office:spreadsheet"); + rapidxml::xml_node<> *doc_node = spreadsheet1.first_node(); + if (!doc_node) + throw std::runtime_error("Invalid XML structure"); + + rapidxml::xml_node<> *body_node = doc_node->first_node(office_body); + if (!body_node) + throw std::runtime_error("Missing office:body"); + + root_node = body_node->first_node(office_spreadsheet); + if (!root_node) + throw std::runtime_error("Missing office:spreadsheet"); } else { - root_node = spreadsheet1.first_node("office:document")->first_node("office:body")-> - first_node("office:spreadsheet"); + rapidxml::xml_node<> *doc_node = spreadsheet1.first_node(office_document); + if (!doc_node) + throw std::runtime_error("Missing office:document"); + + rapidxml::xml_node<> *body_node = doc_node->first_node(office_body); + if (!body_node) + throw std::runtime_error("Missing office:body"); + + root_node = body_node->first_node(office_spreadsheet); + if (!root_node) + throw std::runtime_error("Missing office:spreadsheet"); } + rapidxml::xml_document<> spreadsheet2; - rapidxml::file<> *xml_file2 = new rapidxml::file<>(sheet_file.c_str()); - spreadsheet2.parse<0>((char*)xml_file2->data()); - rapidxml::xml_node<> *root_node2; - root_node2 = spreadsheet2.first_node("table:table"); + std::unique_ptr> xml_file2( + new rapidxml::file<>(sheet_file.c_str())); + spreadsheet2.parse((char *)xml_file2->data()); + + rapidxml::xml_node<> *root_node2 = spreadsheet2.first_node(table_table); + if (!root_node2) + throw std::runtime_error("Missing table:table in sheet file"); + rapidxml::xml_node<> *new_node = spreadsheet2.clone_node(root_node2); root_node->append_node(new_node); - std::ofstream output_file(original_xml); + + // More efficient file writing + std::ofstream output_file(original_xml, std::ios::out | std::ios::trunc); + if (!output_file) + throw std::runtime_error("Cannot open output file"); + output_file << "\n"; output_file << spreadsheet1; - output_file.close(); - delete xml_file; - delete xml_file2; + return original_xml; + + } catch (const std::exception &e) { + throw std::runtime_error("Error in splice_sheet_: " + + std::string(e.what())); + } } [[cpp11::register]] -std::string update_sheet_(const std::string original_xml, const std::string sheet_file, const bool flat, const int sheet_index) { +std::string update_sheet_(const std::string &original_xml, + const std::string &sheet_file, const bool flat, + const int sheet_index) { + try { + if (sheet_index < 1) { + throw std::invalid_argument("Sheet index must be >= 1"); + } + rapidxml::xml_document<> spreadsheet1; - rapidxml::file<> *xml_file = new rapidxml::file<>(original_xml.c_str()); - spreadsheet1.parse<0>((char*)xml_file->data()); - rapidxml::xml_node<>* root_node; - rapidxml::xml_node<>* parent_node; + std::unique_ptr> xml_file( + new rapidxml::file<>(original_xml.c_str())); + spreadsheet1.parse((char *)xml_file->data()); + + // Cache string literals for performance + static const char *office_body = "office:body"; + static const char *office_spreadsheet = "office:spreadsheet"; + static const char *office_document = "office:document"; + static const char *table_table = "table:table"; + + rapidxml::xml_node<> *root_node = nullptr; + rapidxml::xml_node<> *parent_node = nullptr; if (!flat) { - root_node = spreadsheet1.first_node()->first_node("office:body")-> - first_node("office:spreadsheet")->first_node("table:table"); - parent_node = spreadsheet1.first_node()->first_node("office:body")-> - first_node("office:spreadsheet"); + rapidxml::xml_node<> *doc_node = spreadsheet1.first_node(); + if (!doc_node) + throw std::runtime_error("Invalid XML structure"); + + rapidxml::xml_node<> *body_node = doc_node->first_node(office_body); + if (!body_node) + throw std::runtime_error("Missing office:body"); + + parent_node = body_node->first_node(office_spreadsheet); + if (!parent_node) + throw std::runtime_error("Missing office:spreadsheet"); + root_node = parent_node->first_node(table_table); } else { - root_node = spreadsheet1.first_node("office:document")->first_node("office:body")-> - first_node("office:spreadsheet")->first_node("table:table"); - parent_node = spreadsheet1.first_node("office:document")->first_node("office:body")-> - first_node("office:spreadsheet"); + rapidxml::xml_node<> *doc_node = spreadsheet1.first_node(office_document); + if (!doc_node) + throw std::runtime_error("Missing office:document"); + + rapidxml::xml_node<> *body_node = doc_node->first_node(office_body); + if (!body_node) + throw std::runtime_error("Missing office:body"); + + parent_node = body_node->first_node(office_spreadsheet); + if (!parent_node) + throw std::runtime_error("Missing office:spreadsheet"); + + root_node = parent_node->first_node(table_table); } - for (int i = 1; i < sheet_index; i++){ - root_node = root_node->next_sibling("table:table"); + + if (!root_node) + throw std::runtime_error("No sheets found"); + + // Navigate to the target sheet more efficiently + for (int i = 1; i < sheet_index; i++) { + root_node = root_node->next_sibling(table_table); + if (!root_node) { + throw std::runtime_error("Sheet index " + std::to_string(sheet_index) + + " not found"); + } } rapidxml::xml_document<> spreadsheet2; - rapidxml::file<> *xml_file2 = new rapidxml::file<>(sheet_file.c_str()); - spreadsheet2.parse<0>((char*)xml_file2->data()); - rapidxml::xml_node<> *root_node2; - root_node2 = spreadsheet2.first_node("table:table"); + std::unique_ptr> xml_file2( + new rapidxml::file<>(sheet_file.c_str())); + spreadsheet2.parse((char *)xml_file2->data()); + + rapidxml::xml_node<> *root_node2 = spreadsheet2.first_node(table_table); + if (!root_node2) + throw std::runtime_error("Missing table:table in sheet file"); + rapidxml::xml_node<> *new_node = spreadsheet2.clone_node(root_node2); parent_node->insert_node(root_node, new_node); parent_node->remove_node(root_node); - std::ofstream output_file(original_xml); + + // More efficient file writing + std::ofstream output_file(original_xml, std::ios::out | std::ios::trunc); + if (!output_file) + throw std::runtime_error("Cannot open output file"); + output_file << "\n"; output_file << spreadsheet1; - output_file.close(); - delete xml_file; - delete xml_file2; + return original_xml; + + } catch (const std::exception &e) { + throw std::runtime_error("Error in update_sheet_: " + + std::string(e.what())); + } } diff --git a/src/splice.h b/src/splice.h index 2a4557c..87ccc58 100644 --- a/src/splice.h +++ b/src/splice.h @@ -3,10 +3,16 @@ #include "cpp11.hpp" #include "cpp11/r_string.hpp" #include +#include // For std::ios flags +#include // For std::unique_ptr +#include // For exception handling #include #include "rapidxml/rapidxml_ext.hpp" #include "rapidxml/rapidxml_utils.hpp" -std::string splice_sheet_(const std::string original_xml, const std::string sheet_xml, const bool flat); -std::string update_sheet_(const std::string original_xml, const std::string sheet_xml, const bool flat, const int sheet); +std::string splice_sheet_(const std::string &original_xml, + const std::string &sheet_xml, const bool flat); +std::string update_sheet_(const std::string &original_xml, + const std::string &sheet_xml, const bool flat, + const int sheet); diff --git a/src/write_sheet_file_.cpp b/src/write_sheet_file_.cpp index 61154a8..ac20314 100644 --- a/src/write_sheet_file_.cpp +++ b/src/write_sheet_file_.cpp @@ -1,166 +1,204 @@ #include "write_sheet_file_.h" -void cell_out (const cpp11::r_string& value_type, const cpp11::r_string& value, std::ofstream& xml_file) { - const char* value_type_c = Rf_translateCharUTF8(value_type); - const char* value_c = Rf_translateCharUTF8(value); - xml_file << ""; - xml_file << value_c; - xml_file << "\n"; +void cell_out(const cpp11::r_string &value_type, const cpp11::r_string &value, + std::ofstream &xml_file) { + const char *value_type_c = Rf_translateCharUTF8(value_type); + const char *value_c = Rf_translateCharUTF8(value); + + // Pre-allocate string buffer to reduce allocations + std::string cell_xml; + cell_xml.reserve(256); // Reserve space for typical cell content + + cell_xml += ""; + cell_xml += value_c; + cell_xml += "\n"; + + // Single write operation for better performance + xml_file << cell_xml; } -void pad_rows (const bool& padding, const int& cols, const int& cmax, std::ofstream& xml_file) { - if (cols < cmax && padding) { - xml_file << "\n"; - } +void pad_rows(const bool &padding, const int &cols, const int &cmax, + std::ofstream &xml_file) { + if (cols < cmax && padding) { + // More efficient single write with pre-constructed string + std::string pad_xml = "\n"; + xml_file << pad_xml; + } } -cpp11::strings dimnames(const cpp11::data_frame& x, bool cols) { - // Is there a better way? - cpp11::function dimnames_rfun = cpp11::package("readODS")[".get_sanitized_dimnames"]; - return cpp11::writable::strings(static_cast(dimnames_rfun(x, cols))); +cpp11::strings dimnames(const cpp11::data_frame &x, bool cols) { + // Is there a better way? + cpp11::function dimnames_rfun = + cpp11::package("readODS")[".get_sanitized_dimnames"]; + return cpp11::writable::strings(static_cast(dimnames_rfun(x, cols))); } -cpp11::list_of sanitize(const cpp11::data_frame& x, const cpp11::strings column_types) { - cpp11::function sanitize_rfun = cpp11::package("readODS")[".sanitize_df"]; - return cpp11::writable::list_of(static_cast(sanitize_rfun(x, column_types))); +cpp11::list_of sanitize(const cpp11::data_frame &x, + const cpp11::strings column_types) { + cpp11::function sanitize_rfun = cpp11::package("readODS")[".sanitize_df"]; + return cpp11::writable::list_of( + static_cast(sanitize_rfun(x, column_types))); } -cpp11::strings get_column_types(const cpp11::data_frame& x) { - cpp11::function get_column_types_rfun = cpp11::package("readODS")[".get_column_types"]; - return cpp11::writable::strings(static_cast(get_column_types_rfun(x))); +cpp11::strings get_column_types(const cpp11::data_frame &x) { + cpp11::function get_column_types_rfun = + cpp11::package("readODS")[".get_column_types"]; + return cpp11::writable::strings(static_cast(get_column_types_rfun(x))); } -std::string escape_xml(const std::string& input) { - cpp11::sexp input_sexp = cpp11::as_sexp(input); - cpp11::function escape_xml_rfun = cpp11::package("readODS")[".escape_xml"]; - return cpp11::as_cpp(escape_xml_rfun(input_sexp)); +std::string escape_xml(const std::string &input) { + cpp11::sexp input_sexp = cpp11::as_sexp(input); + cpp11::function escape_xml_rfun = cpp11::package("readODS")[".escape_xml"]; + return cpp11::as_cpp(escape_xml_rfun(input_sexp)); } -void write_empty(std::ofstream& xml_file, const std::string& escaped_sheet_name) { - xml_file << ""; - xml_file << ""; +void write_empty(std::ofstream &xml_file, + const std::string &escaped_sheet_name) { + // Single write operation for better performance + std::string empty_table = ""; + xml_file << empty_table; } -void write_df(const cpp11::data_frame& x, const std::string& sheet_name, const bool row_names, const bool col_names, - const bool na_as_string, const bool padding, std::ofstream& xml_file) { - std::string escaped_sheet_name = escape_xml(sheet_name); - if (x.ncol() == 0 || (x.nrow() == 0 && !col_names && x.ncol() != 0)) { - write_empty(xml_file, escaped_sheet_name); - return; - } - cpp11::strings column_types = get_column_types(x); - cpp11::strings rownames_x, colnames_x; - cpp11::list_of x_list = sanitize(x, column_types); +void write_df(const cpp11::data_frame &x, const std::string &sheet_name, + const bool row_names, const bool col_names, + const bool na_as_string, const bool padding, + std::ofstream &xml_file) { + std::string escaped_sheet_name = escape_xml(sheet_name); + if (x.ncol() == 0 || (x.nrow() == 0 && !col_names && x.ncol() != 0)) { + write_empty(xml_file, escaped_sheet_name); + return; + } + cpp11::strings column_types = get_column_types(x); + cpp11::strings rownames_x, colnames_x; + cpp11::list_of x_list = sanitize(x, column_types); + if (row_names) { + rownames_x = dimnames(x, false); + } + if (col_names) { + colnames_x = dimnames(x, true); + } + int rows = col_names ? x_list[0].size() + 1 : x_list[0].size(); + int cols = row_names ? column_types.size() + 1 : column_types.size(); + int cmax = column_types.size() > 1024 ? 16384 : 1024; + // gen_sheet_tag - more efficient single write + std::string header_xml = "\n\n"; + header_xml += "\n"; + xml_file << header_xml; + // add_data - column headers + if (col_names) { + xml_file << ""; if (row_names) { - rownames_x = dimnames(x, false); - } - if (col_names) { - colnames_x = dimnames(x, true); + cell_out("string", "", xml_file); } - int rows = col_names ? x_list[0].size() + 1 : x_list[0].size(); - int cols = row_names ? column_types.size() + 1 : column_types.size(); - int cmax = column_types.size() > 1024 ? 16384 : 1024; - // gen_sheet_tag - xml_file << "\n\n"; - // column - xml_file << "\n"; - // add_data - if (col_names) { - xml_file << ""; - if (row_names) { - cell_out("string", "", xml_file); - } - for (int j = 0; j < colnames_x.size(); j++) { - cell_out("string", colnames_x[j], xml_file); - } - pad_rows(padding, cols, cmax, xml_file); - xml_file << "\n"; + // Cache string literal for performance + static const cpp11::r_string string_type_r("string"); + for (int j = 0; j < colnames_x.size(); j++) { + cell_out(string_type_r, colnames_x[j], xml_file); } - for (int i = 0; i < x_list[0].size(); i++) { - xml_file << "\n"; - if (row_names) { - cell_out("string", rownames_x[i], xml_file); - } - for (int j = 0; j < column_types.size(); j++) { - if (x_list[j][i] != NA_STRING) { - cell_out(column_types[j], x_list[j][i], xml_file); - continue; - } - if (!na_as_string) { - xml_file << "\n"; - continue; - } - cell_out("string", "NA", xml_file); - } - pad_rows(padding, cols, cmax, xml_file); - xml_file << "\n"; + pad_rows(padding, cols, cmax, xml_file); + xml_file << "\n"; + } + for (int i = 0; i < x_list[0].size(); i++) { + xml_file << "\n"; + if (row_names) { + static const cpp11::r_string string_type_r("string"); + cell_out(string_type_r, rownames_x[i], xml_file); } - // pad_columns - if (rows < 1048576 && padding) { - xml_file << "\n"; - xml_file << ""; - xml_file << "\n"; + for (int j = 0; j < column_types.size(); j++) { + if (x_list[j][i] != NA_STRING) { + cell_out(column_types[j], x_list[j][i], xml_file); + continue; + } + if (!na_as_string) { + xml_file << "\n"; + continue; + } + static const cpp11::r_string string_type_r("string"); + static const cpp11::r_string na_value("NA"); + cell_out(string_type_r, na_value, xml_file); } - xml_file << "\n"; + pad_rows(padding, cols, cmax, xml_file); + xml_file << "\n"; + } + // pad_columns - more efficient single write + if (rows < 1048576 && padding) { + std::string padding_xml = "\n"; + padding_xml += "\n"; + xml_file << padding_xml; + } + xml_file << "\n"; } [[cpp11::register]] -cpp11::r_string write_sheet_file_(const std::string& filename, - const cpp11::data_frame& x, - const std::string& sheet_name, - const bool row_names, - const bool col_names, - const bool na_as_string, - const bool padding, - const std::string& header, - const std::string& footer) { - std::ofstream xml_file(filename); - xml_file << header; - write_df(x, sheet_name, row_names, col_names, na_as_string, padding, xml_file); - xml_file << footer; - xml_file << "\n"; - xml_file.close(); - return filename; +cpp11::r_string +write_sheet_file_(const std::string &filename, const cpp11::data_frame &x, + const std::string &sheet_name, const bool row_names, + const bool col_names, const bool na_as_string, + const bool padding, const std::string &header, + const std::string &footer) { + // Use buffered output for better performance + std::ofstream xml_file(filename, std::ios::out | std::ios::trunc); + if (!xml_file) { + throw std::runtime_error("Cannot open file for writing: " + filename); + } + + // Set a larger buffer for better I/O performance + char buffer[8192]; + xml_file.rdbuf()->pubsetbuf(buffer, sizeof(buffer)); + + xml_file << header; + write_df(x, sheet_name, row_names, col_names, na_as_string, padding, + xml_file); + xml_file << footer << "\n"; + + return filename; } [[cpp11::register]] -cpp11::r_string write_sheet_file_list_(const std::string& filename, - const cpp11::list_of& x, - const std::string& sheet_name, // wont use; just for maintain the same interface - const bool row_names, - const bool col_names, - const bool na_as_string, - const bool padding, - const std::string& header, - const std::string& footer) { - std::ofstream xml_file(filename); - xml_file << header; - cpp11::strings sheet_names = x.names(); - for (int i = 0; i < sheet_names.size(); i++) { - cpp11::data_frame current_df = x[i]; - cpp11::r_string current_sheet_name = sheet_names[i]; - write_df(current_df, current_sheet_name, row_names, col_names, na_as_string, padding, xml_file); - } - xml_file << footer; - xml_file << "\n"; - xml_file.close(); - return filename; +cpp11::r_string write_sheet_file_list_( + const std::string &filename, const cpp11::list_of &x, + const std::string + &sheet_name, // wont use; just for maintain the same interface + const bool row_names, const bool col_names, const bool na_as_string, + const bool padding, const std::string &header, const std::string &footer) { + // Use buffered output for better performance + std::ofstream xml_file(filename, std::ios::out | std::ios::trunc); + if (!xml_file) { + throw std::runtime_error("Cannot open file for writing: " + filename); + } + + // Set a larger buffer for better I/O performance + char buffer[8192]; + xml_file.rdbuf()->pubsetbuf(buffer, sizeof(buffer)); + + xml_file << header; + cpp11::strings sheet_names = x.names(); + for (int i = 0; i < sheet_names.size(); i++) { + const cpp11::data_frame ¤t_df = x[i]; // Use const reference + const cpp11::r_string ¤t_sheet_name = + sheet_names[i]; // Use const reference + write_df(current_df, current_sheet_name, row_names, col_names, na_as_string, + padding, xml_file); + } + xml_file << footer << "\n"; + + return filename; } diff --git a/src/write_sheet_file_.h b/src/write_sheet_file_.h index 51044f2..1335e60 100644 --- a/src/write_sheet_file_.h +++ b/src/write_sheet_file_.h @@ -1,19 +1,19 @@ #pragma once #include "cpp11.hpp" -#include "cpp11/r_string.hpp" #include "cpp11/list.hpp" +#include "cpp11/r_string.hpp" #include #include #include +#include // For std::ios flags #include +#include // For exception handling +#include // For std::string operations -cpp11::r_string write_sheet_(const std::string& filename, - const cpp11::data_frame& x, - const std::string& sheet, - const bool row_names, - const bool col_names, - const bool na_as_string, - const bool padding, - const std::string& header, - const std::string& footer); +cpp11::r_string write_sheet_(const std::string &filename, + const cpp11::data_frame &x, + const std::string &sheet, const bool row_names, + const bool col_names, const bool na_as_string, + const bool padding, const std::string &header, + const std::string &footer); diff --git a/tests/testthat/test_progress.R b/tests/testthat/test_progress.R new file mode 100644 index 0000000..3e89143 --- /dev/null +++ b/tests/testthat/test_progress.R @@ -0,0 +1,29 @@ +test_that("readODs_progress, normal case", { + ## pretend to be interactive + local_mocked_bindings(.is_interactive = function(...) TRUE) + expect_true(readODS_progress()) +}) + +test_that("readODs_progress, suppress in batch", { + ## pretend to be batch + local_mocked_bindings(.is_interactive = function(...) FALSE) + expect_false(readODS_progress()) +}) + +test_that("readODS_progress, suppression by options", { + ## pretend to be interactive + local_mocked_bindings(.is_interactive = function(...) TRUE) + + withr::with_options(list(readODS.show_progress = FALSE), { + expect_false(readODS_progress()) + }) + withr::with_options(list(knitr.in.progress = TRUE), { + expect_false(readODS_progress()) + }) + withr::with_envvar(new = c("RSTUDIO_NOTEBOOK" = "123"), { + expect_false(readODS_progress()) + }) + withr::with_envvar(new = c("RSTUDIO_NOTEBOOK" = ""), { + expect_true(readODS_progress()) + }) +}) diff --git a/tests/testthat/test_write_ods.R b/tests/testthat/test_write_ods.R index 3ad1de1..6c6c18d 100644 --- a/tests/testthat/test_write_ods.R +++ b/tests/testthat/test_write_ods.R @@ -180,3 +180,17 @@ test_that("list of dataframes, edge cases #56", { expect_error(path <- write_fods(list("iris" = iris), sheet = "whatever"), NA) expect_equal(list_fods_sheets(path), c("iris")) ## sheet is ignored }) + +withr::with_seed(123, { + test_that("fix 213 run 1", { + mydata <- data.frame(a=1:3, b=2:4) + expect_warning(write_ods(mydata), NA) + }) +}) + +withr::with_seed(123, { + test_that("fix 213 run 2", { + mydata <- data.frame(a=1:3, b=2:4) + expect_warning(write_ods(mydata), NA) + }) +})