Write Apache Arrow table to string C++

  apache-arrow, apache-arrow-cpp, c++

I’m trying to write an Apache Arrow table to a string. My big example has problems and I can’t get this little example to work. This one segfaults inside of Arrow in the WriteTable call. My bigger example doesn’t appear to serialize correctly.

#include <arrow/api.h>
#include <arrow/io/memory.h>
#include <arrow/ipc/api.h>
 
std::shared_ptr<arrow::Table> makeSimpleFakeArrowTable() {
    std::vector<std::shared_ptr<arrow::Field>> arrowFields;
    arrowFields.emplace_back(std::make_shared<arrow::Field>("Field1", arrow::int64()));
    arrowFields.emplace_back(std::make_shared<arrow::Field>("Field2", arrow::float64()));

    auto schema = std::make_shared<arrow::Schema>(arrowFields);

    std::vector<std::shared_ptr<arrow::Array>> columns(schema->num_fields());

    arrow::Int64Builder longBuilder;
    longBuilder.Append(20);
    longBuilder.Finish(&(columns.at(0)));
    arrow::DoubleBuilder doubleBuilder;
    doubleBuilder.Append(10.0);
    longBuilder.Finish(&(columns.at(1)));

    return arrow::Table::Make(schema, columns);
}

std::shared_ptr<arrow::RecordBatch>
getArrowBatchFromBytes(const std::string& bytes) {
    arrow::io::BufferReader arrowBufferReader{bytes};
    auto streamReader =
        arrow::ipc::RecordBatchStreamReader::Open(&arrowBufferReader).ValueOrDie();

    auto batch = streamReader->Next().ValueOrDie();

    return batch;
}


std::string arrowTableToByteString(const std::shared_ptr<arrow::Table>& table) {
    auto stream = arrow::io::BufferOutputStream::Create().ValueOrDie();
    auto batchWriter = arrow::ipc::MakeStreamWriter(stream, table->schema()).ValueOrDie();

    auto status = batchWriter->WriteTable(*table);
    if (not status.ok()) {
        throw std::runtime_error(
            "Couldn't write Arrow Table to byte string. Arrow status was: '" +
            status.ToString() + "'.");
    }

    std::shared_ptr<arrow::Buffer> buffer = stream->Finish().ValueOrDie();
    return buffer->ToHexString();
}

int main(int argc, char** argv) {
    auto simpleFakeArrowTable = makeSimpleFakeArrowTable();
    std::string tableAsByteString = arrowTableToByteString(simpleFakeArrowTable);

    auto batch = getArrowBatchFromBytes(tableAsByteString);
    assert(batch != nullptr);
}

Source: Windows Questions C++

LEAVE A COMMENT