Created
October 27, 2025 17:59
-
-
Save TomAugspurger/e7c4f7826fb4ca759fb7aa7a7ba50bbb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // ----------------------------------------------------------------------------- | |
| // Reproducer (translated from Python) | |
| // ----------------------------------------------------------------------------- | |
| #include <cudf/column/column.hpp> | |
| #include <cudf/column/column_factories.hpp> | |
| #include <cudf/strings/contains.hpp> | |
| #include <cudf/strings/regex/regex_program.hpp> | |
| #include <cudf/strings/strings_column_view.hpp> | |
| #include <cudf/types.hpp> | |
| #include <rmm/cuda_stream.hpp> | |
| #include <rmm/cuda_stream_view.hpp> | |
| #include <rmm/mr/device/per_device_resource.hpp> | |
| #include <memory> | |
| #include <string> | |
| #include <vector> | |
| #include <iostream> | |
| // Helper function to create a strings column from a vector | |
| std::unique_ptr<cudf::column> make_strings_column(std::vector<std::string> const& h_strings, | |
| rmm::cuda_stream_view stream) { | |
| // Calculate offsets | |
| std::vector<cudf::size_type> offsets; | |
| offsets.reserve(h_strings.size() + 1); | |
| cudf::size_type offset = 0; | |
| offsets.push_back(offset); | |
| for (auto const& str : h_strings) { | |
| offset += str.size(); | |
| offsets.push_back(offset); | |
| } | |
| // Concatenate all strings | |
| std::string concatenated; | |
| for (auto const& str : h_strings) { | |
| concatenated += str; | |
| } | |
| // Create offsets column | |
| auto offsets_column = cudf::make_numeric_column( | |
| cudf::data_type{cudf::type_id::INT32}, | |
| offsets.size(), | |
| cudf::mask_state::UNALLOCATED, | |
| stream | |
| ); | |
| // Copy offsets to device | |
| cudaMemcpyAsync(offsets_column->mutable_view().data<cudf::size_type>(), | |
| offsets.data(), | |
| offsets.size() * sizeof(cudf::size_type), | |
| cudaMemcpyHostToDevice, | |
| stream.value()); | |
| // Create chars buffer | |
| rmm::device_buffer chars_buffer(concatenated.size(), stream); | |
| // Copy chars to device | |
| cudaMemcpyAsync(chars_buffer.data(), | |
| concatenated.data(), | |
| concatenated.size() * sizeof(char), | |
| cudaMemcpyHostToDevice, | |
| stream.value()); | |
| // Create strings column | |
| return cudf::make_strings_column( | |
| h_strings.size(), | |
| std::move(offsets_column), | |
| std::move(chars_buffer), | |
| 0, // null_count | |
| rmm::device_buffer{0, stream} // null_mask | |
| ); | |
| } | |
| int main() { | |
| try { | |
| // Create a CUDA stream | |
| rmm::cuda_stream stream; | |
| // Create a vector of strings | |
| std::vector<std::string> h_strings = {"a", "b", "c"}; | |
| // Create a strings column | |
| auto col = make_strings_column(h_strings, stream); | |
| // Create a strings column view | |
| cudf::strings_column_view strings_view(col->view()); | |
| // Create regex program with pattern "a" and default flags | |
| auto program = cudf::strings::regex_program::create("a", | |
| cudf::strings::regex_flags::DEFAULT); | |
| // Call contains_re function | |
| auto result = cudf::strings::contains_re(strings_view, *program, stream); | |
| // Synchronize the stream | |
| stream.synchronize(); | |
| std::cout << "Successfully executed contains_re with stream" << std::endl; | |
| std::cout << "Result column size: " << result->size() << std::endl; | |
| return 0; | |
| } catch (std::exception const& e) { | |
| std::cerr << "Error: " << e.what() << std::endl; | |
| return 1; | |
| } | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment