Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
2618887
add utility to print column
ChuckHastings Oct 30, 2019
d00c75a
update changelog
ChuckHastings Oct 30, 2019
93432eb
Merge branch 'branch-0.11' into fea_print_columns
ChuckHastings Nov 4, 2019
a2d34f7
Use to_host, add print method, clean up a bit
ChuckHastings Nov 4, 2019
dfedeb8
missed adding all flag to header file
ChuckHastings Nov 4, 2019
e54c21f
make to_string work with to_host
ChuckHastings Nov 5, 2019
6b2d5db
new approach creating a vector of strings. Prepare for gather
ChuckHastings Nov 6, 2019
1fd1b58
Merge branch 'branch-0.11' into fea_print_columns
ChuckHastings Nov 6, 2019
97821f2
get last commit to compile (mistakes in manual merge)
ChuckHastings Nov 6, 2019
4297dbf
address changes from PR review
ChuckHastings Nov 7, 2019
a49d68a
Merge branch 'branch-0.11' into fea_print_columns
ChuckHastings Nov 7, 2019
9294d54
test gather logic, delete temporary logic
ChuckHastings Nov 7, 2019
377a75c
Merge branch 'davidwendt/port-nvs-datetime-ops' into fea_print_columns
ChuckHastings Nov 8, 2019
ac93840
add support for strings and timestamps
ChuckHastings Nov 8, 2019
7151228
Merge branch 'branch-0.11' into fea_print_columns
ChuckHastings Nov 8, 2019
eefe24a
address comments from PR review
ChuckHastings Nov 15, 2019
b998d6f
Merge branch 'branch-0.11' into fea_print_columns
ChuckHastings Nov 15, 2019
7b64a8e
need the right version of slice
ChuckHastings Nov 15, 2019
3a42cb8
Merge branch 'branch-0.11' into fea_print_columns
ChuckHastings Nov 19, 2019
2a9ca2c
include file moved between draft and final versions of 3232
ChuckHastings Nov 19, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- PR #3192 Add dtype param to cast `DataFrame` on init
- PR #3222 Add nvtext character tokenizer
- PR #3223 Java expose underlying buffers
- PR #3255 Add utility to print column
- PR #3300 Add `DataFrame.insert`
- PR #3263 Define and implement new `valid_if`
- PR #3278 Add `to_host` utility to copy `column_view` to host
Expand Down
159 changes: 151 additions & 8 deletions cpp/tests/utilities/column_utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
#include <cudf/column/column_view.hpp>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/bit.hpp>
#include <cudf/strings/convert/convert_datetime.hpp>
#include <cudf/detail/copy.hpp>

#include <tests/utilities/cudf_gtest.hpp>
#include <tests/utilities/column_wrapper.hpp>

#include <thrust/equal.h>

Expand All @@ -34,26 +39,88 @@ void expect_column_properties_equal(cudf::column_view lhs, cudf::column_view rhs
EXPECT_EQ(lhs.size(), rhs.size());
EXPECT_EQ(lhs.null_count(), rhs.null_count());
if (lhs.size() > 0) {
EXPECT_EQ(lhs.nullable(), rhs.nullable());
EXPECT_EQ(lhs.nullable(), rhs.nullable());
}
EXPECT_EQ(lhs.has_nulls(), rhs.has_nulls());
EXPECT_EQ(lhs.num_children(), rhs.num_children());
}

// Verify elementwise equality
void expect_columns_equal(cudf::column_view lhs, cudf::column_view rhs) {
class corresponding_rows_unequal {
public:
corresponding_rows_unequal(table_device_view d_lhs, table_device_view d_rhs): comp(d_lhs, d_rhs) {
}

cudf::experimental::row_equality_comparator<true> comp;

__device__ bool operator()(size_type index) {
return !comp(index, index);
}
};

void expect_columns_equal(cudf::column_view lhs, cudf::column_view rhs, bool print_all_differences) {
expect_column_properties_equal(lhs, rhs);

auto d_lhs = cudf::table_device_view::create(table_view{{lhs}});
auto d_rhs = cudf::table_device_view::create(table_view{{rhs}});

EXPECT_TRUE(
thrust::equal(thrust::device, thrust::make_counting_iterator(0),
thrust::make_counting_iterator(lhs.size()),
thrust::make_counting_iterator(0),
cudf::experimental::row_equality_comparator<true>{*d_lhs, *d_rhs}));
thrust::device_vector<int> differences(lhs.size());

auto diff_iter = thrust::copy_if(thrust::device,
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(lhs.size()),
differences.begin(),
corresponding_rows_unequal(*d_lhs, *d_rhs));

CUDA_TRY(cudaDeviceSynchronize());

differences.resize(thrust::distance(differences.begin(), diff_iter));

if (diff_iter > differences.begin()) {
if (print_all_differences) {
//
// If there are differences, display them all
//
std::ostringstream buffer;
buffer << "differences:" << std::endl;

cudf::table_view source_table ({lhs, rhs});

fixed_width_column_wrapper<int32_t> diff_column(differences.begin(), differences.end());

std::unique_ptr<cudf::experimental::table> diff_table = cudf::experimental::gather(source_table,
diff_column);

//
// Need to pull back the differences
//
std::vector<std::string> h_left_strings = to_strings(diff_table->get_column(0));
std::vector<std::string> h_right_strings = to_strings(diff_table->get_column(1));

for (size_t i = 0 ; i < differences.size() ; ++i) {
buffer << "lhs[" << differences[i] << "] = " << h_left_strings[i]
<< ", rhs[" << differences[i] << "] = " << h_right_strings[i] << std::endl;
}

EXPECT_EQ(differences.size(), size_t{0}) << buffer.str();
} else {
//
// If there are differences, just display the first one
//
int index = differences[0];

auto diff_lhs = cudf::experimental::detail::slice(lhs, index, index+1);
auto diff_rhs = cudf::experimental::detail::slice(rhs, index, index+1);

std::vector<std::string> h_left_strings = to_strings(diff_lhs);
std::vector<std::string> h_right_strings = to_strings(diff_rhs);

EXPECT_EQ(differences.size(), size_t{0}) << "first difference: "
<< "lhs[" << index << "] = "
<< to_string(diff_lhs, "")
<< ", rhs[" << index << "] = "
<< to_string(diff_rhs, "");
}
}
}

// Bitwise equality
Expand All @@ -69,5 +136,81 @@ void expect_equal_buffers(void const* lhs, void const* rhs,
typed_rhs));
}

struct column_view_printer {
template <typename Element, typename std::enable_if_t<is_numeric<Element>()>* = nullptr>
void operator()(cudf::column_view const& col, std::vector<std::string> & out) {
auto h_data = cudf::test::to_host<Element>(col);

out.resize(col.size());

if (col.nullable()) {
size_type index = 0;
std::transform(h_data.first.begin(), h_data.first.end(), out.begin(), [&h_data, &index](Element el) {
return (bit_is_set(h_data.second.data(), index++)) ? std::to_string(el) : std::string("@");
});
} else {
std::transform(h_data.first.begin(), h_data.first.end(), out.begin(), [](Element el) {
return std::to_string(el);
});
}
}

template <typename Element, typename std::enable_if_t<is_timestamp<Element>()>* = nullptr>
void operator()(cudf::column_view const& col, std::vector<std::string> & out) {
//
// For timestamps, convert timestamp column to column of strings, then
// call string version
//
auto col_as_strings = cudf::strings::from_timestamps(col);

this->template operator()<cudf::string_view>(*col_as_strings, out);
}

template <typename Element, typename std::enable_if_t<std::is_same<Element, cudf::string_view>::value>* = nullptr>
void operator()(cudf::column_view const& col, std::vector<std::string> & out) {
//
// Implementation for strings, call special to_host variant
//
auto h_data = cudf::test::to_host<std::string>(col);

out.resize(col.size());

if (col.nullable()) {
size_type index = 0;
std::transform(h_data.first.begin(), h_data.first.end(), out.begin(), [&h_data, &index](std::string el) {
return (bit_is_set(h_data.second.data(), index++)) ? el : std::string("@");
});
} else {
out = std::move(h_data.first);
}
}
};

std::vector<std::string> to_strings(cudf::column_view const& col) {
std::vector<std::string> reply;

cudf::experimental::type_dispatcher(col.type(),
column_view_printer{},
col,
reply);

return reply;
}

std::string to_string(cudf::column_view const& col, std::string const& delimiter) {

std::ostringstream buffer;
std::vector<std::string> h_data = to_strings(col);

std::copy(h_data.begin(), h_data.end() - 1, std::ostream_iterator<std::string>(buffer, delimiter.c_str()));
buffer << h_data.back();

return buffer.str();
}

void print(cudf::column_view const& col, std::ostream &os, std::string const& delimiter) {
os << to_string(col, delimiter);
}

} // namespace test
} // namespace cudf
33 changes: 29 additions & 4 deletions cpp/tests/utilities/column_utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,11 @@ void expect_column_properties_equal(cudf::column_view lhs, cudf::column_view rhs
*
* Treats null elements as equivalent.
*
* @param lhs The first column
* @param rhs The second column
*/
void expect_columns_equal(cudf::column_view lhs, cudf::column_view rhs);
* @param lhs The first column
* @param rhs The second column
* @param print_all_differences If true display all differences
*---------------------------------------------------------------------------**/
void expect_columns_equal(cudf::column_view lhs, cudf::column_view rhs, bool print_all_differences = false);

/**
* @brief Verifies the bitwise equality of two device memory buffers.
Expand All @@ -54,6 +55,30 @@ void expect_columns_equal(cudf::column_view lhs, cudf::column_view rhs);
void expect_equal_buffers(void const* lhs, void const* rhs,
std::size_t size_bytes);

/**---------------------------------------------------------------------------*
* @brief Displays a column view as a string
*
* @param col The column view
* @param delimiter The delimiter to put between strings
*---------------------------------------------------------------------------**/
std::string to_string(cudf::column_view const& col, std::string const& delimiter);

/**---------------------------------------------------------------------------*
* @brief Convert column values to a host vector of strings
*
* @param col The column view
*---------------------------------------------------------------------------**/
std::vector<std::string> to_strings(cudf::column_view const& col);

/**---------------------------------------------------------------------------*
* @brief Print a column view to an ostream
*
* @param os The output stream
* @param col The column view
* @param delimiter The delimiter to put between strings
*---------------------------------------------------------------------------**/
void print(cudf::column_view const& col, std:: ostream &os = std::cout, std::string const& delimiter=",");

/**
* @brief Copies the data and bitmask of a `column_view` to the host.
*
Expand Down
60 changes: 60 additions & 0 deletions cpp/tests/utilities_tests/column_utilities_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <tests/utilities/type_lists.hpp>

template <typename T>

struct ColumnUtilitiesTest
: public cudf::test::BaseFixture,
cudf::test::UniformRandomGenerator<cudf::size_type> {
Expand All @@ -35,7 +36,11 @@ struct ColumnUtilitiesTest
}
};

template <typename T>
struct ColumnUtilitiesTestNumeric : public cudf::test::BaseFixture {};

TYPED_TEST_CASE(ColumnUtilitiesTest, cudf::test::FixedWidthTypes);
TYPED_TEST_CASE(ColumnUtilitiesTestNumeric, cudf::test::NumericTypes);

TYPED_TEST(ColumnUtilitiesTest, NonNullableToHost) {
auto sequence = cudf::test::make_counting_transform_iterator(
Expand Down Expand Up @@ -98,3 +103,58 @@ TEST_F(ColumnUtilitiesStringsTest, StringsToHostAllNulls)
auto host_data = cudf::test::to_host<std::string>(strings);
EXPECT_TRUE( host_data.first.empty() );
}

TYPED_TEST(ColumnUtilitiesTestNumeric, PrintColumnNumeric) {
const char* delimiter = ",";

cudf::test::fixed_width_column_wrapper<TypeParam> cudf_col({1, 2, 3, 4, 5});
std::vector<TypeParam> std_col({1, 2, 3, 4, 5});

std::ostringstream tmp;

int index = 0;
for (auto x : std_col) {
tmp << ((index == 0) ? "" : delimiter);
tmp << std::to_string(x);
++index;
}

EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), tmp.str());
}

TYPED_TEST(ColumnUtilitiesTestNumeric, PrintColumnWithInvalids) {
const char* delimiter = ",";

cudf::test::fixed_width_column_wrapper<TypeParam> cudf_col{ {1, 2, 3, 4, 5},
{1, 0, 1, 0, 1} };
std::vector<TypeParam> std_col({1, 2, 3, 4, 5});

std::ostringstream tmp;
tmp << std::to_string(std_col[0])
<< delimiter << "@"
<< delimiter << std::to_string(std_col[2])
<< delimiter << "@"
<< delimiter << std::to_string(std_col[4]);

EXPECT_EQ(cudf::test::to_string(cudf_col, delimiter), tmp.str());
}

TEST_F(ColumnUtilitiesStringsTest, StringsToString) {
const char* delimiter = ",";

std::vector<const char*> h_strings{ "eee", "bb", nullptr, "", "aa", "bbb", "ééé" };
cudf::test::strings_column_wrapper strings( h_strings.begin(), h_strings.end(),
thrust::make_transform_iterator( h_strings.begin(), [] (auto str) { return str!=nullptr; }));


std::ostringstream tmp;
tmp << h_strings[0]
<< delimiter << h_strings[1]
<< delimiter << "@"
<< delimiter << h_strings[3]
<< delimiter << h_strings[4]
<< delimiter << h_strings[5]
<< delimiter << h_strings[6];

EXPECT_EQ(cudf::test::to_string(strings, delimiter), tmp.str());
}