diff --git a/CHANGELOG.md b/CHANGELOG.md index 1535d41df0e..f3f71424a6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - PR #3192 Add dtype param to cast `DataFrame` on init - PR #3222 Add nvtext character tokenizer - PR #3223 Java expose underlying buffers +- PR #3255 Add utility to print column - PR #3300 Add `DataFrame.insert` - PR #3263 Define and implement new `valid_if` - PR #3278 Add `to_host` utility to copy `column_view` to host diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index c02bf0d40d4..20892ab72ec 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -19,7 +19,12 @@ #include #include #include +#include +#include +#include + #include +#include #include @@ -34,26 +39,88 @@ void expect_column_properties_equal(cudf::column_view lhs, cudf::column_view rhs EXPECT_EQ(lhs.size(), rhs.size()); EXPECT_EQ(lhs.null_count(), rhs.null_count()); if (lhs.size() > 0) { - EXPECT_EQ(lhs.nullable(), rhs.nullable()); + EXPECT_EQ(lhs.nullable(), rhs.nullable()); } EXPECT_EQ(lhs.has_nulls(), rhs.has_nulls()); EXPECT_EQ(lhs.num_children(), rhs.num_children()); } -// Verify elementwise equality -void expect_columns_equal(cudf::column_view lhs, cudf::column_view rhs) { +class corresponding_rows_unequal { +public: + corresponding_rows_unequal(table_device_view d_lhs, table_device_view d_rhs): comp(d_lhs, d_rhs) { + } + + cudf::experimental::row_equality_comparator comp; + + __device__ bool operator()(size_type index) { + return !comp(index, index); + } +}; + +void expect_columns_equal(cudf::column_view lhs, cudf::column_view rhs, bool print_all_differences) { expect_column_properties_equal(lhs, rhs); auto d_lhs = cudf::table_device_view::create(table_view{{lhs}}); auto d_rhs = cudf::table_device_view::create(table_view{{rhs}}); - EXPECT_TRUE( - thrust::equal(thrust::device, thrust::make_counting_iterator(0), - thrust::make_counting_iterator(lhs.size()), - thrust::make_counting_iterator(0), - cudf::experimental::row_equality_comparator{*d_lhs, *d_rhs})); + thrust::device_vector differences(lhs.size()); + + auto diff_iter = thrust::copy_if(thrust::device, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(lhs.size()), + differences.begin(), + corresponding_rows_unequal(*d_lhs, *d_rhs)); CUDA_TRY(cudaDeviceSynchronize()); + + differences.resize(thrust::distance(differences.begin(), diff_iter)); + + if (diff_iter > differences.begin()) { + if (print_all_differences) { + // + // If there are differences, display them all + // + std::ostringstream buffer; + buffer << "differences:" << std::endl; + + cudf::table_view source_table ({lhs, rhs}); + + fixed_width_column_wrapper diff_column(differences.begin(), differences.end()); + + std::unique_ptr diff_table = cudf::experimental::gather(source_table, + diff_column); + + // + // Need to pull back the differences + // + std::vector h_left_strings = to_strings(diff_table->get_column(0)); + std::vector h_right_strings = to_strings(diff_table->get_column(1)); + + for (size_t i = 0 ; i < differences.size() ; ++i) { + buffer << "lhs[" << differences[i] << "] = " << h_left_strings[i] + << ", rhs[" << differences[i] << "] = " << h_right_strings[i] << std::endl; + } + + EXPECT_EQ(differences.size(), size_t{0}) << buffer.str(); + } else { + // + // If there are differences, just display the first one + // + int index = differences[0]; + + auto diff_lhs = cudf::experimental::detail::slice(lhs, index, index+1); + auto diff_rhs = cudf::experimental::detail::slice(rhs, index, index+1); + + std::vector h_left_strings = to_strings(diff_lhs); + std::vector h_right_strings = to_strings(diff_rhs); + + EXPECT_EQ(differences.size(), size_t{0}) << "first difference: " + << "lhs[" << index << "] = " + << to_string(diff_lhs, "") + << ", rhs[" << index << "] = " + << to_string(diff_rhs, ""); + } + } } // Bitwise equality @@ -69,5 +136,81 @@ void expect_equal_buffers(void const* lhs, void const* rhs, typed_rhs)); } +struct column_view_printer { + template ()>* = nullptr> + void operator()(cudf::column_view const& col, std::vector & out) { + auto h_data = cudf::test::to_host(col); + + out.resize(col.size()); + + if (col.nullable()) { + size_type index = 0; + std::transform(h_data.first.begin(), h_data.first.end(), out.begin(), [&h_data, &index](Element el) { + return (bit_is_set(h_data.second.data(), index++)) ? std::to_string(el) : std::string("@"); + }); + } else { + std::transform(h_data.first.begin(), h_data.first.end(), out.begin(), [](Element el) { + return std::to_string(el); + }); + } + } + + template ()>* = nullptr> + void operator()(cudf::column_view const& col, std::vector & out) { + // + // For timestamps, convert timestamp column to column of strings, then + // call string version + // + auto col_as_strings = cudf::strings::from_timestamps(col); + + this->template operator()(*col_as_strings, out); + } + + template ::value>* = nullptr> + void operator()(cudf::column_view const& col, std::vector & out) { + // + // Implementation for strings, call special to_host variant + // + auto h_data = cudf::test::to_host(col); + + out.resize(col.size()); + + if (col.nullable()) { + size_type index = 0; + std::transform(h_data.first.begin(), h_data.first.end(), out.begin(), [&h_data, &index](std::string el) { + return (bit_is_set(h_data.second.data(), index++)) ? el : std::string("@"); + }); + } else { + out = std::move(h_data.first); + } + } +}; + +std::vector to_strings(cudf::column_view const& col) { + std::vector reply; + + cudf::experimental::type_dispatcher(col.type(), + column_view_printer{}, + col, + reply); + + return reply; +} + +std::string to_string(cudf::column_view const& col, std::string const& delimiter) { + + std::ostringstream buffer; + std::vector h_data = to_strings(col); + + std::copy(h_data.begin(), h_data.end() - 1, std::ostream_iterator(buffer, delimiter.c_str())); + buffer << h_data.back(); + + return buffer.str(); +} + +void print(cudf::column_view const& col, std::ostream &os, std::string const& delimiter) { + os << to_string(col, delimiter); +} + } // namespace test } // namespace cudf diff --git a/cpp/tests/utilities/column_utilities.hpp b/cpp/tests/utilities/column_utilities.hpp index 6512b9a75a2..2d7fe5c25d3 100644 --- a/cpp/tests/utilities/column_utilities.hpp +++ b/cpp/tests/utilities/column_utilities.hpp @@ -39,10 +39,11 @@ void expect_column_properties_equal(cudf::column_view lhs, cudf::column_view rhs * * Treats null elements as equivalent. * - * @param lhs The first column - * @param rhs The second column - */ -void expect_columns_equal(cudf::column_view lhs, cudf::column_view rhs); + * @param lhs The first column + * @param rhs The second column + * @param print_all_differences If true display all differences + *---------------------------------------------------------------------------**/ +void expect_columns_equal(cudf::column_view lhs, cudf::column_view rhs, bool print_all_differences = false); /** * @brief Verifies the bitwise equality of two device memory buffers. @@ -54,6 +55,30 @@ void expect_columns_equal(cudf::column_view lhs, cudf::column_view rhs); void expect_equal_buffers(void const* lhs, void const* rhs, std::size_t size_bytes); +/**---------------------------------------------------------------------------* + * @brief Displays a column view as a string + * + * @param col The column view + * @param delimiter The delimiter to put between strings + *---------------------------------------------------------------------------**/ +std::string to_string(cudf::column_view const& col, std::string const& delimiter); + +/**---------------------------------------------------------------------------* + * @brief Convert column values to a host vector of strings + * + * @param col The column view + *---------------------------------------------------------------------------**/ +std::vector to_strings(cudf::column_view const& col); + +/**---------------------------------------------------------------------------* + * @brief Print a column view to an ostream + * + * @param os The output stream + * @param col The column view + * @param delimiter The delimiter to put between strings + *---------------------------------------------------------------------------**/ +void print(cudf::column_view const& col, std:: ostream &os = std::cout, std::string const& delimiter=","); + /** * @brief Copies the data and bitmask of a `column_view` to the host. * diff --git a/cpp/tests/utilities_tests/column_utilities_tests.cu b/cpp/tests/utilities_tests/column_utilities_tests.cu index a2a73c564ef..f3b79aae985 100644 --- a/cpp/tests/utilities_tests/column_utilities_tests.cu +++ b/cpp/tests/utilities_tests/column_utilities_tests.cu @@ -22,6 +22,7 @@ #include template + struct ColumnUtilitiesTest : public cudf::test::BaseFixture, cudf::test::UniformRandomGenerator { @@ -35,7 +36,11 @@ struct ColumnUtilitiesTest } }; +template +struct ColumnUtilitiesTestNumeric : public cudf::test::BaseFixture {}; + TYPED_TEST_CASE(ColumnUtilitiesTest, cudf::test::FixedWidthTypes); +TYPED_TEST_CASE(ColumnUtilitiesTestNumeric, cudf::test::NumericTypes); TYPED_TEST(ColumnUtilitiesTest, NonNullableToHost) { auto sequence = cudf::test::make_counting_transform_iterator( @@ -98,3 +103,58 @@ TEST_F(ColumnUtilitiesStringsTest, StringsToHostAllNulls) auto host_data = cudf::test::to_host(strings); EXPECT_TRUE( host_data.first.empty() ); } + +TYPED_TEST(ColumnUtilitiesTestNumeric, PrintColumnNumeric) { + const char* delimiter = ","; + + cudf::test::fixed_width_column_wrapper cudf_col({1, 2, 3, 4, 5}); + std::vector std_col({1, 2, 3, 4, 5}); + + std::ostringstream tmp; + + int index = 0; + for (auto x : std_col) { + tmp << ((index == 0) ? "" : delimiter); + tmp << std::to_string(x); + ++index; + } + + EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), tmp.str()); +} + +TYPED_TEST(ColumnUtilitiesTestNumeric, PrintColumnWithInvalids) { + const char* delimiter = ","; + + cudf::test::fixed_width_column_wrapper cudf_col{ {1, 2, 3, 4, 5}, + {1, 0, 1, 0, 1} }; + std::vector std_col({1, 2, 3, 4, 5}); + + std::ostringstream tmp; + tmp << std::to_string(std_col[0]) + << delimiter << "@" + << delimiter << std::to_string(std_col[2]) + << delimiter << "@" + << delimiter << std::to_string(std_col[4]); + + EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), tmp.str()); +} + +TEST_F(ColumnUtilitiesStringsTest, StringsToString) { + const char* delimiter = ","; + + std::vector h_strings{ "eee", "bb", nullptr, "", "aa", "bbb", "ééé" }; + cudf::test::strings_column_wrapper strings( h_strings.begin(), h_strings.end(), + thrust::make_transform_iterator( h_strings.begin(), [] (auto str) { return str!=nullptr; })); + + + std::ostringstream tmp; + tmp << h_strings[0] + << delimiter << h_strings[1] + << delimiter << "@" + << delimiter << h_strings[3] + << delimiter << h_strings[4] + << delimiter << h_strings[5] + << delimiter << h_strings[6]; + + EXPECT_EQ(cudf::test::to_string(strings, delimiter), tmp.str()); +}