diff --git a/CHANGELOG.md b/CHANGELOG.md index 77204dfc..31a5c9a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] +## [v0.11.12](https://github.com/JuliaData/XLSX.jl/tree/v0.11.11) - 2026-06-19 +- add a package extension to support [FileIO.jl](https://github.com/JuliaIO/FileIO.jl) +- update copyright notice end-date + +## [v0.11.11](https://github.com/JuliaData/XLSX.jl/tree/v0.11.11) - 2026-06-18 +- Fix [#410](https://github.com/JuliaData/XLSX.jl/issues/410) by making `is_binary_path` case insensitive + ## [v0.11.10](https://github.com/JuliaData/XLSX.jl/tree/v0.11.10) - 2026-05-28 - support macro-enabled files ([#401](https://github.com/JuliaData/XLSX.jl/issues/401)) - support pass-through of customXml files (again). ([#403](https://github.com/JuliaData/XLSX.jl/issues/403)) diff --git a/LICENSE b/LICENSE index cd757018..adf7eeab 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018-2023 Felipe Noronha Tavares +Copyright (c) 2018-2026 Felipe Noronha Tavares and other contributors: https://github.com/juliadata/XLSX.jl/contributors diff --git a/Project.toml b/Project.toml index d7bf04ca..8db6a7d9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "XLSX" uuid = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" license = "MIT" -version = "0.11.10" +version = "0.11.11" authors = ["Felipe Noronha "] repo = "https://github.com/juliadata/XLSX.jl.git" @@ -19,16 +19,20 @@ XML = "72c71f33-b9b6-44de-8c94-c961784809e2" ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" [weakdeps] +FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b" [extensions] +FileIOExt = "FileIO" StyledStringsSstsExt = "StyledStrings" [compat] CSV = "0.10.15" Colors = "0.12, 0.13" Distributions = "0.25.0" +FileIO = "1" OrderedCollections = "1" +Pkg = "1" PrecompileTools = "1" StyledStrings = "1.0.3" Tables = "1" @@ -41,9 +45,11 @@ julia = "1.8" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["CSV", "DataFrames", "Distributions", "Random", "StyledStrings", "Test"] +test = ["CSV", "DataFrames", "Distributions", "FileIO", "Pkg", "Random", "StyledStrings", "Test"] diff --git a/README.md b/README.md index 36da5b39..16251d16 100644 --- a/README.md +++ b/README.md @@ -84,12 +84,7 @@ and send a Pull Request. ## Alternative Packages -* [ExcelFiles.jl](https://github.com/davidanthoff/ExcelFiles.jl) - * [FileIO.jl](https://github.com/JuliaIO/FileIO.jl) * [LibXLSXWriter.jl](https://github.com/jaakkor2/LibXLSXWriter.jl) -* [Taro.jl](https://github.com/aviks/Taro.jl) - -* [XLSXReader.jl](https://github.com/mpastell/XLSXReader.jl) diff --git a/docs/make.jl b/docs/make.jl index add284ae..3b876d0f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -6,7 +6,10 @@ makedocs( modules = [ XLSX ], pages = [ "Home" => "index.md", - "Tutorial" => "tutorial.md", + "Tutorial" => Any[ + "Using XLSX" => "tutorial/XLSXtutorial.md", + "Using FileIO" => "tutorial/FileIOtutorial.md", + ], "Formatting Guide" => Any[ "Cell formats" => "formatting/cellFormatting.md", "Conditional formats" => "formatting/conditionalFormatting.md", diff --git a/docs/src/api/files.md b/docs/src/api/files.md index 9c581ffe..dd5e6feb 100644 --- a/docs/src/api/files.md +++ b/docs/src/api/files.md @@ -13,6 +13,18 @@ XLSX.writexlsx XLSX.savexlsx ``` +## Files (using FileIO) + +!!! note + + These functions extend `FileIO.load` and `FileIO.save`. Call them as + `FileIO.load(...)` and `FileIO.save(...)` after doing `using FileIO`. + +```@docs +XLSX.load +XLSX.save +``` + ## Worksheets ```@docs diff --git a/docs/src/tutorial/FileIOtutorial.md b/docs/src/tutorial/FileIOtutorial.md new file mode 100644 index 00000000..b770ee0a --- /dev/null +++ b/docs/src/tutorial/FileIOtutorial.md @@ -0,0 +1,138 @@ +# FileIO Tutorial + +## Introduction + +A package extension to XLSX.jl provides support for Excel files +under the [FileIO.jl](https://github.com/JuliaIO/FileIO.jl) package. + +[FileIO.jl](https://github.com/JuliaIO/FileIO.jl) aims to provide a common +framework for detecting file formats and dispatching to appropriate readers/writers. + +Through [FileIO.jl](https://github.com/JuliaIO/FileIO.jl), you can read +simple tabular data from an Excel (.xlsx) file and save tabular data +to an Excel file using simple `load` and `save` functions without needing +to know anything about XLSX.jl itself. + +XLSX.jl provides much more extensive functionality if you need it. +Check out the rest of the documentation for full details. + +## Setup + +First, make sure you have the **FileIO.jl** and **XLSX.jl** packages installed. + +```julia +julia> using Pkg + +julia> Pkg.add(["FileIO", "XLSX"]) +``` + +## Usage + +### Load an Excel file + +To read an Excel file into a `DataFrame`, use the following julia code: + +```julia +using FileIO, DataFrames + +df = DataFrame(load("data.xlsx", "Sheet1")) +``` + +The call to `load` returns an object that is a [Tables.jl](https://github.com/JuliaData/Tables.jl) table, +so it can be passed to any function that can handle Tables.jl tables. Here are some examples of +materializing an Excel file into such data structures: + +```julia +using FileIO, DataFrames, PrettyTables + +# Load into a DataFrame +julia> DataFrame(load("HTable.xlsx")) +5×10 DataFrame + Row │ Year 1940 1950 1960 1970 1980 1990 2000 2010 2020 + │ String Any Any Float64 Float64 Any Any Float64 Float64 Float64 +─────┼─────────────────────────────────────────────────────────────────────────────────────────── + 1 │ Col A 1 2 3.0 4.0 5 6 7.0 8.0 9.0 + 2 │ Col B 10 20 30.0 40.0 50 60 70.0 80.0 90.0 + 3 │ Col C 100 200 300.0 400.0 500 600 700.0 800.0 900.0 + 4 │ Col D 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 + 5 │ Col E Hello 2025-12-19 3.0 3.33 Hello 2025-12-19 3.0 3.33 1.0 + +julia> DataFrame(load("HTable.xlsx"; transpose=true)) +9×6 DataFrame + Row │ Year Col A Col B Col C Col D Col E + │ Int64 Int64 Int64 Int64 Float64 Any +─────┼───────────────────────────────────────────────── + 1 │ 1940 1 10 100 0.1 Hello + 2 │ 1950 2 20 200 0.2 2025-12-19 + 3 │ 1960 3 30 300 0.3 3 + 4 │ 1970 4 40 400 0.4 3.33 + 5 │ 1980 5 50 500 0.5 Hello + 6 │ 1990 6 60 600 0.6 2025-12-19 + 7 │ 2000 7 70 700 0.7 3 + 8 │ 2010 8 80 800 0.8 3.33 + 9 │ 2020 9 90 900 0.9 true + + +# Load into a PrettyTable +julia> PrettyTable(load("HTable.xlsx")) +┌───────┬───────┬────────────┬───────┬───────┬───────┬────────────┬───────┬───────┬───────┐ +│ Year │ 1940 │ 1950 │ 1960 │ 1970 │ 1980 │ 1990 │ 2000 │ 2010 │ 2020 │ +├───────┼───────┼────────────┼───────┼───────┼───────┼────────────┼───────┼───────┼───────┤ +│ Col A │ 1 │ 2 │ 3.0 │ 4.0 │ 5 │ 6 │ 7.0 │ 8.0 │ 9.0 │ +│ Col B │ 10 │ 20 │ 30.0 │ 40.0 │ 50 │ 60 │ 70.0 │ 80.0 │ 90.0 │ +│ Col C │ 100 │ 200 │ 300.0 │ 400.0 │ 500 │ 600 │ 700.0 │ 800.0 │ 900.0 │ +│ Col D │ 0.1 │ 0.2 │ 0.3 │ 0.4 │ 0.5 │ 0.6 │ 0.7 │ 0.8 │ 0.9 │ +│ Col E │ Hello │ 2025-12-19 │ 3.0 │ 3.33 │ Hello │ 2025-12-19 │ 3.0 │ 3.33 │ 1.0 │ +└───────┴───────┴────────────┴───────┴───────┴───────┴────────────┴───────┴───────┴───────┘ + +julia> PrettyTable(load("HTable.xlsx"; transpose=true)) +┌──────┬───────┬───────┬───────┬───────┬────────────┐ +│ Year │ Col A │ Col B │ Col C │ Col D │ Col E │ +├──────┼───────┼───────┼───────┼───────┼────────────┤ +│ 1940 │ 1 │ 10 │ 100 │ 0.1 │ Hello │ +│ 1950 │ 2 │ 20 │ 200 │ 0.2 │ 2025-12-19 │ +│ 1960 │ 3 │ 30 │ 300 │ 0.3 │ 3 │ +│ 1970 │ 4 │ 40 │ 400 │ 0.4 │ 3.33 │ +│ 1980 │ 5 │ 50 │ 500 │ 0.5 │ Hello │ +│ 1990 │ 6 │ 60 │ 600 │ 0.6 │ 2025-12-19 │ +│ 2000 │ 7 │ 70 │ 700 │ 0.7 │ 3 │ +│ 2010 │ 8 │ 80 │ 800 │ 0.8 │ 3.33 │ +│ 2020 │ 9 │ 90 │ 900 │ 0.9 │ true │ +└──────┴───────┴───────┴───────┴───────┴────────────┘ + +``` + +For more information, see [`XLSX.load`](@ref) + +### Save an Excel file + +The following code saves any Tables.jl table (such as a `DataFrame`) as an Excel file: + +```julia +using FileIO + +save("output.xlsx", myTable) +``` + +For more information, see [`XLSX.save`](@ref) + +### Using the pipe syntax + +The `load` and `save` functions also support the pipe syntax. For example, to load an +Excel file into a `DataFrame`, one can use the following code: + +```julia +using FileIO, DataFrame + +df = load("data.xlsx", "Sheet1") |> DataFrame +``` + +To save any Tables.jl compatible table (such as a DataFrame), one can use the following form: + +```julia +using FileIO, DataFrame + +df = # Aquire a DataFrame somehow + +df |> save("output.xlsx") +``` diff --git a/docs/src/tutorial.md b/docs/src/tutorial/XLSXtutorial.md similarity index 99% rename from docs/src/tutorial.md rename to docs/src/tutorial/XLSXtutorial.md index 4cc4bd18..26623e91 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial/XLSXtutorial.md @@ -1,9 +1,9 @@ -# Tutorial +# XLSX Tutorial ## Setup -First, make sure you have **XLSX.jl** package installed. +First, make sure you have the **XLSX.jl** package installed. ```julia julia> using Pkg diff --git a/ext/FileIOExt.jl b/ext/FileIOExt.jl new file mode 100644 index 00000000..7937e595 --- /dev/null +++ b/ext/FileIOExt.jl @@ -0,0 +1,41 @@ +module FileIOExt +# Provides hooks for FileIO.jl to save and load XLSX files. + +using FileIO + +using XLSX + +import XLSX: load, save + +function load(f::File{FileIO.format"Excel"}; transpose::Bool=false, kw...) + filename = FileIO.filename(f) + if transpose + return XLSX.readtransposedtable(filename; kw...) + else + return XLSX.readtable(filename; kw...) + end +end + +function load(f::File{FileIO.format"Excel"}, sheet; transpose::Bool=false, kw...) + filename = FileIO.filename(f) + if transpose + return XLSX.readtransposedtable(filename, sheet; kw...) + else + return XLSX.readtable(filename, sheet; kw...) + end +end + +function load(f::File{FileIO.format"Excel"}, sheet, rows_or_columns; transpose::Bool=false, kw...) + filename = FileIO.filename(f) + if transpose + return XLSX.readtransposedtable(filename, sheet, rows_or_columns; kw...) + else + return XLSX.readtable(filename, sheet, rows_or_columns; kw...) + end +end + +function save(f::File{FileIO.format"Excel"}, data; kw...) + XLSX.writetable(FileIO.filename(f), data; kw...) +end + +end # module \ No newline at end of file diff --git a/src/XLSX.jl b/src/XLSX.jl index c35b9444..ec0226d8 100644 --- a/src/XLSX.jl +++ b/src/XLSX.jl @@ -65,6 +65,7 @@ include("images.jl") include("write.jl") include("fileArray.jl") + PCT.@setup_workload begin # Putting some things in `@setup_workload` instead of `@compile_workload` can reduce the size of the # precompile file and potentially make loading faster. diff --git a/src/images.jl b/src/images.jl index a2650d14..44db5bcd 100644 --- a/src/images.jl +++ b/src/images.jl @@ -97,6 +97,7 @@ If multiple, overlapping images are added, newer images overly older ones. # Arguments `s::Worksheet`: the target worksheet. + `ref::AbstractString`: Either a valid cell reference (e.g. `"A1"`) or a valid cell range (e.g. `"B2:D4"`). The image will be anchored to the top left of the reference and sized to fit within the reference bounds. If a cell range is given, the `size` keyword argument is ignored. diff --git a/src/read.jl b/src/read.jl index eff42f0a..e5d8cd23 100644 --- a/src/read.jl +++ b/src/read.jl @@ -528,11 +528,12 @@ function openxlsx(source::Union{AbstractString,IO}; end function parse_file_mode(mode::AbstractString)::Tuple{Bool,Bool} - if mode == "r" + m = lowercase(mode) + if m == "r" return (true, false) - elseif mode == "w" + elseif m == "w" return (false, true) - elseif mode == "rw" || mode == "wr" + elseif m == "rw" || m == "wr" return (true, true) else throw(XLSXError("Couldn't parse file mode $mode.")) @@ -553,35 +554,35 @@ function convert_strict_to_transitional!(xf::XLSXFile, pass::Int) occursin(r"xl/worksheets/sheet\d+\.xml", filename) end - if should_process - data = xf.data[filename] - xroot = data[end] - attrs = XML.attributes(xroot) - - for (k, v) in attrs - if k == "conformance" && v == "strict" - delete!(attrs, "conformance") - elseif startswith(v, "http://purl.oclc.org/ooxml") - if haskey(STRICT_TO_TRANSITIONAL, v) - attrs[k] = STRICT_TO_TRANSITIONAL[v] - else - throw(XLSXError("Unsupported strict OOXML namespace or relationship type: \"$v\" in $filename. Please open an issue at https://github.com/JuliaData/XLSX.jl/issues")) - end + should_process || continue + + data = xf.data[filename] + xroot = data[end] + attrs = XML.attributes(xroot) + + for (k, v) in attrs + if k == "conformance" && v == "strict" + delete!(attrs, "conformance") + elseif startswith(v, "http://purl.oclc.org/ooxml") + if haskey(STRICT_TO_TRANSITIONAL, v) + attrs[k] = STRICT_TO_TRANSITIONAL[v] + else + throw(XLSXError("Unsupported strict OOXML namespace or relationship type: \"$v\" in $filename. Please open an issue at https://github.com/JuliaData/XLSX.jl/issues")) end end + end - # For .rels files, also patch Type= on child Relationship elements - for el in XML.children(xroot) - el_attrs = XML.attributes(el) - if !isnothing(el_attrs) - haskey(el_attrs, "conformance") && delete!(el_attrs, "conformance") - type_val = get(el_attrs, "Type", "") - if startswith(type_val, "http://purl.oclc.org/ooxml") - if haskey(STRICT_TO_TRANSITIONAL, type_val) - el_attrs["Type"] = STRICT_TO_TRANSITIONAL[type_val] - else - throw(XLSXError("Unsupported strict OOXML relationship type: \"$type_val\" in $filename. Please open an issue at https://github.com/JuliaData/XLSX.jl/issues")) - end + # For .rels files, also patch Type= on child Relationship elements + for el in XML.children(xroot) + el_attrs = XML.attributes(el) + if !isnothing(el_attrs) + haskey(el_attrs, "conformance") && delete!(el_attrs, "conformance") + type_val = get(el_attrs, "Type", "") + if startswith(type_val, "http://purl.oclc.org/ooxml") + if haskey(STRICT_TO_TRANSITIONAL, type_val) + el_attrs["Type"] = STRICT_TO_TRANSITIONAL[type_val] + else + throw(XLSXError("Unsupported strict OOXML relationship type: \"$type_val\" in $filename. Please open an issue at https://github.com/JuliaData/XLSX.jl/issues")) end end end @@ -957,10 +958,6 @@ function stream_files(xf::XLSXFile, zip_io::ZipArchives.ZipReader; pass::Int, end end -# list of filename prefixes to pass through as binary files. -const BINARY_PREFIXES = ["customXml"] - - # Read xml files in three passes # pass 1 - read all but worksheets and sharedStrings # pass 2 - only read sharedStrings (needed before worksheets) @@ -1004,7 +1001,7 @@ function load_files!(xf::XLSXFile, zip_io::ZipArchives.ZipReader; pass::Int, if has_sst(wb) sst_load!(wb) end - elseif xf.use_cache_for_sheet_data && !occursin(r"^xl/sharedStrings\.xml$", file.name) + elseif xf.use_cache_for_sheet_data# && !occursin(r"^xl/sharedStrings\.xml$", file.name) rid = get_relationship_id_by_target(wb, file.name) for sheet in wb.sheets if sheet.relationship_id == rid @@ -1526,3 +1523,128 @@ function unescape(x::AbstractString) end return result end + +# Hooks for FileIOExt.jl + +""" +```julia + FileIO.load( + source::String, + [sheet::String, + [columns::String]]; + [first_row::Int], + [first_column::String] + [column_labels::Vector{String}], + [header::Bool], + [normalizenames::Bool], + [transpose::Bool] + ) +``` +Read tabular data from an Excel file, `source`, and return it as a `Tables.jl` compatible table. +The resulting table object can be passed directly to any function that accepts `Tables.jl` data +(e.g. `DataFrame` from package `DataFrames.jl`). + +This function requires both FileIO.jl v1.20.0 or higher to be active in the current environment and a Julia version >= v1.9. + +#### Arguments: + +* `source`: The name of the file to be loaded. +* `sheet`: Specifies the sheet name to be loaded. If `sheet` is not given, the first Excel sheet in the file will be used. +* `columns`: Determines which columns to read. For example, `"B:D"` will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns. If `transpose = true` or is omitted, `columns` should be used to specify rows. For example, specifying `"2:4"` with `transpose = true` will read only from these rows. + +!!! note + + The file extension provided in `source` must be `.xlsx`, `.xltx`, `.xlsm`, + or `.xltm` for FileIO to recognize the file format as an Excel file. + +#### Keywords: + +* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet (ignored if `transpose = true`). +* `first_column`: Indicates the first row of the data table to be read. For example, `first_column="B"` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet (ignored if `transpose = false` or is omitted). +* `column_labels`: Specifies column names for the header of the table. If `column_labels` are given and `header=true`, the headers given by `column_labels` will be used, and the first row of the table (containing headers) will be ignored. +* `header`: Indicates if the first row (column if `transpose = true`) is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row (column) of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. +* `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false`. +* `transpose`: Set to `true` to transpose the table to read data from rows not columns. + +#### Examples + +```julia +julia> PrettyTable(load("HTable.xlsx", "Offset"; first_row=2)) + +julia> df = DataFrame(load("HTable.xlsx", "Offset", "2:7"; transpose=true, first_column="B")) + +julia> df = DataFrame(load("HTable.xlsx"; normalizenames=true, transpose=true, column_labels=["Date", "Name1", "Name2", "Name3", "Name4", "Name5"])) + +``` +""" +function load(args...; kwargs...) + throw(XLSXError( + """ + load requires the FileIO.jl package. + + Please install and load it with: + + using Pkg + Pkg.add("FileIO") + using FileIO + + Then retry FileIO.load. + """ + )) + + return nothing +end + +""" +```julia + FileIO.save( + source::String, + data; + [sheetname::String], + [overwrite::Bool] + ) +``` +Save a `Tables.jl` compatible table to an Excel file, `source`. + +This function requires both FileIO.jl v1.20.0 or higher to be active in the current environment and a Julia version >= v1.9. + +#### Arguments: + +* `source`: The name of the file to be created on save. +* `data`: A `Tables.jl` compatible table to be saved to the file. For example, a `DataFrame` from package `DataFrames.jl`. + +!!! note + + The file extension provided in `source` must be `.xlsx`, `.xltx`, `.xlsm`, + or `.xltm` for FileIO to recognize the file format as an Excel file. The + file created will be a standard workbook (ie not an Excel template nor a + macro-enabled workbook) regardless of which of these four extensions is used. + +#### Keywords: + +* `sheetname`: Specify the sheetname to be used in the created file. By default, the sheetname will be `Sheet1`. +* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false`. + +#### Examples + +```julia +julia> save("myfile.xlsx", myTable; sheetname="myname", overwrite=true) +``` +""" +function save(args...; kwargs...) + throw(XLSXError( + """ + save requires the FileIO.jl package. + + Please install and load it with: + + using Pkg + Pkg.add("FileIO") + using FileIO + + Then retry FileIO.save. + """ + )) + + return nothing +end diff --git a/src/table.jl b/src/table.jl index 5f1b1167..b760ace0 100644 --- a/src/table.jl +++ b/src/table.jl @@ -3,6 +3,13 @@ # Table # +Base.show(io::IO, dt::DataTable) = + Base.show(io, MIME"text/plain"(), dt) + +Base.show(io::IO, ::MIME"text/plain", dt::DataTable) = + print(io, "XLSX.DataTable with $(length(dt.data)) columns and $(length(dt.data[1])) rows.") + + # Returns a tuple with the first and last index of the columns for a `SheetRow`. function column_bounds(sr::SheetRow) isempty(sr) && throw(XLSXError("Can't get column bounds from an empty row.")) diff --git a/test/data/TestData.xlsx b/test/data/TestData.xlsx new file mode 100644 index 00000000..420a8cb4 Binary files /dev/null and b/test/data/TestData.xlsx differ diff --git a/test/runtests.jl b/test/runtests.jl index fa14f596..f6d62d8b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -69,7 +69,7 @@ src_data_directory = joinpath(dirname(pathof(XLSX)), "data") # Issue #293 @testset "read .xltx file" begin - xf = XLSX.openxlsx(joinpath(data_directory, "Template File.xltx"); mode="rw") + xf = XLSX.openxlsx(joinpath(data_directory, "Template File.xltx"); mode="RW") s=xf[1] @test s["P5"] == 5 @test XLSX.getFormula(s, "B5") == "=RANDBETWEEN(0,100)" @@ -84,7 +84,7 @@ src_data_directory = joinpath(dirname(pathof(XLSX)), "data") @test xf.template_type == XLSX.NotATemplate isfile(joinpath(data_directory, "Template File.xlsx")) && rm(joinpath(data_directory, "Template File.xlsx")) - XLSX.openxlsx(joinpath(data_directory, "Template File.xltx"); mode="rw") do xf + XLSX.openxlsx(joinpath(data_directory, "Template File.xltx"); mode="RW") do xf s=xf[1] @test s["P5"] == 5 @test XLSX.getFormula(s, "B5") == "=RANDBETWEEN(0,100)" @@ -96,14 +96,14 @@ src_data_directory = joinpath(dirname(pathof(XLSX)), "data") # Issue #401 @testset "macro enabled files" begin - mf = XLSX.openxlsx(joinpath(data_directory, "macro-enabled.xlsm"); mode="rw") + mf = XLSX.openxlsx(joinpath(data_directory, "macro-enabled.xlsm"); mode="Rw") @test mf[1]["A1"] == "hello" XLSX.writexlsx("mytest.xlsm", mf; overwrite=true) mf = XLSX.openxlsx("mytest.xlsm"; mode="rw") @test mf[1]["A1"] == "hello" isfile("mytest.xlsm") && rm("mytest.xlsm") - mf = XLSX.openxlsx(joinpath(data_directory, "macro-enabled2.xltm"); mode="rw") + mf = XLSX.openxlsx(joinpath(data_directory, "macro-enabled2.xltm"); mode="rW") @test mf[1]["A1"] == "hello" @test mf.template_type == XLSX.XLTMTemplate XLSX.savexlsx(mf) @@ -114,12 +114,12 @@ src_data_directory = joinpath(dirname(pathof(XLSX)), "data") @test mf.template_type == XLSX.NotATemplate isfile(joinpath(data_directory, "macro-enabled2.xlsm")) && rm(joinpath(data_directory, "macro-enabled2.xlsm")) - XLSX.openxlsx(joinpath(data_directory, "macro-enabled2.xltm"); mode="rw") do mf + XLSX.openxlsx(joinpath(data_directory, "macro-enabled2.xltm"); mode="wr") do mf @test mf[1]["A1"] == "hello" @test mf.template_type == XLSX.XLTMTemplate end @test isfile(joinpath(data_directory, "macro-enabled2.xlsm")) - mf = XLSX.openxlsx(joinpath(data_directory, "macro-enabled2.xlsm"); mode="rw") + mf = XLSX.openxlsx(joinpath(data_directory, "macro-enabled2.xlsm"); mode="WR") @test mf[1]["A1"] == "hello" @test mf.template_type == XLSX.NotATemplate isfile(joinpath(data_directory, "macro-enabled2.xlsm")) && rm(joinpath(data_directory, "macro-enabled2.xlsm")) @@ -815,6 +815,8 @@ end end isfile("mytest.xlsx") && rm("mytest.xlsx") + + # Issue #395 @testset "Multi-threaded read" begin N_FORMULAS = 5000 # Should be a multiple of ROW_CHUNKSIZE N_ITER = 5 @@ -1654,6 +1656,10 @@ end s = f["table"] s[:] dtable = XLSX.gettable(s) + + plaintext = sprint(show, dtable) + @test plaintext == "XLSX.DataTable with 6 columns and 8 rows." + data, col_names = dtable.data, dtable.column_labels @test col_names == [Symbol("Column B"), Symbol("Column C"), Symbol("Column D"), Symbol("Column E"), Symbol("Column F"), Symbol("Column G")] @@ -7765,4 +7771,199 @@ end @test length(imgs) == 1 @test imgs[1].sheet == "Sheet2" end -end \ No newline at end of file +end + +# Helper: get columns and names from a loaded XLSX.DataTable +function get_cols(source::XLSX.DataTable) + return source.data, source.column_labels +end + +@testset "No FileIO" verbose=true begin + + filename = joinpath(data_directory, "TestData.xlsx") + + try + XLSX.load(filename, "Sheet1") + @test false # should error before this line + catch e + @test e isa XLSX.XLSXError && occursin("requires the FileIO.jl package", e.msg) + end + try + XLSX.save(filename, "Sheet1") + @test false # should error before this line + catch e + @test e isa XLSX.XLSXError && occursin("requires the FileIO.jl package", e.msg) + end +end + +using Pkg +using FileIO + +@static if VERSION >= v"1.9-" + + if Pkg.pkgversion(FileIO) > v"1.19.0" + + @testset "FileIO" verbose=true begin + + filename = joinpath(data_directory, "TestData.xlsx") + + efile = load(filename, "Sheet1") + + @test Tables.istable(efile) == true # Defined in XLSX.jl + + # Test show renders expected number of rows and columns. + @testset "show plain text" begin + s = sprint(show, efile) + @test s == "XLSX.DataTable with 13 columns and 4 rows." + end + + @testset "read table" begin + for source in [load(filename, "Sheet1", "C:O"; first_row=3), load(filename, "Sheet1")] + df, names = get_cols(source) + @test length(df) == 13 + @test length(df[1]) == 4 + + @test df[1] == [1., 1.5, 2., 2.5] + @test df[2] == ["A", "BB", "CCC", "DDDD"] + @test df[3] == [true, false, false, true] + @test isequal(df[4], [2, "EEEEE", false, 1.5]) + @test isequal(df[5], [9., "III", missing, true]) + @test isequal(df[6], [3., missing, 3.5, 4.]) + @test isequal(df[7], ["FF", missing, "GGG", "HHHH"]) + @test isequal(df[8], [missing, true, missing, false]) + @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), Date(1988, 4, 9), Dates.Time(15, 2, 0)] + @test isequal(df[10], [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), missing]) + @test all(ismissing, df[11]) + @test isequal(df[12], [missing, missing, missing, missing]) + @test isequal(df[13], [missing, 3.4, "HKEJW", missing]) + end + + df, names = get_cols(load(filename, "Sheet1", "C:O"; first_row=4, header=false)) + @test names == [:C, :D, :E, :F, :G, :H, :I, :J, :K, :L, :M, :N, :O] + @test length(df[1]) == 4 + @test length(df) == 13 + @test df[1] == [1., 1.5, 2., 2.5] + @test df[2] == ["A", "BB", "CCC", "DDDD"] + @test df[3] == [true, false, false, true] + @test isequal(df[4], [2, "EEEEE", false, 1.5]) + @test isequal(df[5], [9., "III", missing, true]) + @test isequal(df[6], [3., missing, 3.5, 4.]) + @test isequal(df[7], ["FF", missing, "GGG", "HHHH"]) + @test isequal(df[8], [missing, true, missing, false]) + @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] + @test isequal(df[10], [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), missing]) + @test all(ismissing, df[11]) + @test all(ismissing, df[12]) + @test isequal(df[13], [missing, 3.4, "HKEJW", missing]) + @test ismissing(df[12][4]) + + good_colnames = [:c1, :c2, :c3, :c4, :c5, :c6, :c7, :c8, :c9, :c10, :c11, :c12, :c13] + + df, names = get_cols(load(filename, "Sheet1", "C:O"; first_row=4, header=false, column_labels=good_colnames)) + @test names == good_colnames + @test length(df[1]) == 4 + @test length(df) == 13 + @test df[1] == [1., 1.5, 2., 2.5] + @test df[2] == ["A", "BB", "CCC", "DDDD"] + @test df[3] == [true, false, false, true] + @test isequal(df[4], [2, "EEEEE", false, 1.5]) + @test isequal(df[5], [9., "III", missing, true]) + @test isequal(df[6], [3., missing, 3.5, 4.]) + @test isequal(df[7], ["FF", missing, "GGG", "HHHH"]) + @test isequal(df[8], [missing, true, missing, false]) + @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] + @test isequal(df[10], [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), missing]) + @test all(ismissing, df[11]) + @test all(ismissing, df[12]) + @test isequal(df[13], [missing, 3.4, "HKEJW", missing]) + @test ismissing(df[12][4]) + + # Test for saving DataFrame to XLSX + input = (Day = ["Nov. 27", "Nov. 28", "Nov. 29"], Highest = [78, 79, 75]) |> DataFrames.DataFrame + save("file.xlsx", input) + output = load("file.xlsx", "Sheet1") |> DataFrames.DataFrame + @test input == output + rm("file.xlsx") + + # Test for saving DataFrame to XLSX with sheetname keyword + input = (Day = ["Nov. 27", "Nov. 28", "Nov. 29"], Highest = [78, 79, 75]) |> DataFrames.DataFrame + save("file.xlsx", input, sheetname="SheetName") + output = load("file.xlsx", "SheetName") |> DataFrames.DataFrame + @test input == output + rm("file.xlsx") + + df, names = get_cols(load(filename, "Sheet1"; column_labels=good_colnames)) + @test names == good_colnames + @test length(df[1]) == 4 + @test length(df) == 13 + @test df[1] == [1., 1.5, 2., 2.5] + @test df[2] == ["A", "BB", "CCC", "DDDD"] + @test df[3] == [true, false, false, true] + @test isequal(df[4], [2, "EEEEE", false, 1.5]) + @test isequal(df[5], [9., "III", missing, true]) + @test isequal(df[6], [3., missing, 3.5, 4.]) + @test isequal(df[7], ["FF", missing, "GGG", "HHHH"]) + @test isequal(df[8], [missing, true, missing, false]) + @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] + @test isequal(df[10], [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), missing]) + @test all(ismissing, df[11]) + @test all(ismissing, df[12]) + @test isequal(df[13], [missing, 3.4, "HKEJW", missing]) + @test ismissing(df[12][4]) + + # Too few column labels - Note: Bypass FileIO here to avoid false "Fatal Error" from FileIO when the error is correctly thrown by ExcelFiles for mismatched column_labels length. + try + XLSX.load(File{FileIO.format"Excel"}(filename), "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4]) + @test false # should error before this line + catch e + @test e isa XLSX.XLSXError && occursin("`column_range` (length=13) and `column_labels` (length=4) must have the same length.", e.msg) + end + + # Test for constructing DataFrame with empty header cell + data, names = get_cols(load(filename, "Sheet2", "C:E")) + @test names == [:Col1, Symbol("#Empty"), :Col3] + + # normalizenames keyword (XLSX.jl v0.11 only) + data, names = get_cols(load(filename, "Sheet2", "C:E"; normalizenames=true)) + @test names == [:Col1, :_Empty, :Col3] + end + @testset "transposed tables" begin + # Note: readtransposedtable cannot handle entirely empty rows/columns, + # so the Transpose sheet omits those from the original Sheet1 data. + # Note: eltype of mixed date columns is Dates.TimeType (not Any) when + # there are no missing values, since a common supertype can be inferred. + + df, names = get_cols(load(filename, "Transpose"; transpose=true, first_column=2)) + @test length(df) == 5 + @test length(df[1]) == 4 + @test names == [Symbol("Some Float64s"), Symbol("Some Strings"), Symbol("Some Bools"), Symbol("Mixed with NA"), Symbol("Some dates")] + + @test df[1] == [1.0, 1.5, 2.0, 2.5] + @test df[2] == ["A", "BB", "CCC", "DDDD"] + @test df[3] == Bool[true, false, false, true] + @test isequal(df[4], Any[9, "III", missing, true]) + @test df[5] == Dates.TimeType[Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), Date(1988, 4, 9), Dates.Time(15, 2, 0)] + end + + @testset "template and macro files" begin + tbl = load(joinpath(data_directory, "Template File.xltx"), "Sheet1", "K:N") + @test length(tbl.data) == 4 + @test length(tbl.data[1]) == 9 + tbl = load(joinpath(data_directory, "macro-enabled2.xltm")) + @test length(tbl.data) == 1 + @test length(tbl.data[1]) == 0 + @test tbl.column_labels == [:hello] + tbl = load(joinpath(data_directory, "macro-enabled.xlsm")) + @test length(tbl.data) == 1 + @test length(tbl.data[1]) == 0 + @test tbl.column_labels == [:hello] + end + + end + else + @info "Skipping FileIO tests (requires FileIO > v1.19.0, got $(pkgversion(FileIO)))" + end +else + @info "Skipping FileIO tests (requires Julia > v1.9, got $(VERSION))" +end +