Skip to content

Instantly share code, notes, and snippets.

@TomAugspurger
Created October 27, 2025 17:59
Show Gist options
  • Select an option

  • Save TomAugspurger/e7c4f7826fb4ca759fb7aa7a7ba50bbb to your computer and use it in GitHub Desktop.

Select an option

Save TomAugspurger/e7c4f7826fb4ca759fb7aa7a7ba50bbb to your computer and use it in GitHub Desktop.
// -----------------------------------------------------------------------------
// Reproducer (translated from Python)
// -----------------------------------------------------------------------------
#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/strings/contains.hpp>
#include <cudf/strings/regex/regex_program.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/types.hpp>
#include <rmm/cuda_stream.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/per_device_resource.hpp>
#include <memory>
#include <string>
#include <vector>
#include <iostream>
// Helper function to create a strings column from a vector
std::unique_ptr<cudf::column> make_strings_column(std::vector<std::string> const& h_strings,
rmm::cuda_stream_view stream) {
// Calculate offsets
std::vector<cudf::size_type> offsets;
offsets.reserve(h_strings.size() + 1);
cudf::size_type offset = 0;
offsets.push_back(offset);
for (auto const& str : h_strings) {
offset += str.size();
offsets.push_back(offset);
}
// Concatenate all strings
std::string concatenated;
for (auto const& str : h_strings) {
concatenated += str;
}
// Create offsets column
auto offsets_column = cudf::make_numeric_column(
cudf::data_type{cudf::type_id::INT32},
offsets.size(),
cudf::mask_state::UNALLOCATED,
stream
);
// Copy offsets to device
cudaMemcpyAsync(offsets_column->mutable_view().data<cudf::size_type>(),
offsets.data(),
offsets.size() * sizeof(cudf::size_type),
cudaMemcpyHostToDevice,
stream.value());
// Create chars buffer
rmm::device_buffer chars_buffer(concatenated.size(), stream);
// Copy chars to device
cudaMemcpyAsync(chars_buffer.data(),
concatenated.data(),
concatenated.size() * sizeof(char),
cudaMemcpyHostToDevice,
stream.value());
// Create strings column
return cudf::make_strings_column(
h_strings.size(),
std::move(offsets_column),
std::move(chars_buffer),
0, // null_count
rmm::device_buffer{0, stream} // null_mask
);
}
int main() {
try {
// Create a CUDA stream
rmm::cuda_stream stream;
// Create a vector of strings
std::vector<std::string> h_strings = {"a", "b", "c"};
// Create a strings column
auto col = make_strings_column(h_strings, stream);
// Create a strings column view
cudf::strings_column_view strings_view(col->view());
// Create regex program with pattern "a" and default flags
auto program = cudf::strings::regex_program::create("a",
cudf::strings::regex_flags::DEFAULT);
// Call contains_re function
auto result = cudf::strings::contains_re(strings_view, *program, stream);
// Synchronize the stream
stream.synchronize();
std::cout << "Successfully executed contains_re with stream" << std::endl;
std::cout << "Result column size: " << result->size() << std::endl;
return 0;
} catch (std::exception const& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment