blob: f1862fb1d81a312d2df139eff6200124c5deff4a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <iostream>
#include "arrow/api.h"
#include "arrow/filesystem/api.h"
#include "arrow/result.h"
#include "./config.h"
#include "gar/api.h"
#include "gar/writer/arrow_chunk_writer.h"
arrow::Result<std::shared_ptr<arrow::Table>> generate_vertex_table() {
// property "id"
arrow::Int64Builder i64builder;
ARROW_RETURN_NOT_OK(i64builder.AppendValues({0, 1, 2}));
std::shared_ptr<arrow::Array> i64array;
ARROW_RETURN_NOT_OK(i64builder.Finish(&i64array));
// property "firstName"
arrow::StringBuilder strbuilder;
ARROW_RETURN_NOT_OK(strbuilder.Append("John"));
ARROW_RETURN_NOT_OK(strbuilder.Append("Jane"));
ARROW_RETURN_NOT_OK(strbuilder.Append("Alice"));
std::shared_ptr<arrow::Array> strarray;
ARROW_RETURN_NOT_OK(strbuilder.Finish(&strarray));
// property "lastName"
arrow::StringBuilder strbuilder2;
ARROW_RETURN_NOT_OK(strbuilder2.Append("Smith"));
ARROW_RETURN_NOT_OK(strbuilder2.Append("Doe"));
ARROW_RETURN_NOT_OK(strbuilder2.Append("Wonderland"));
std::shared_ptr<arrow::Array> strarray2;
ARROW_RETURN_NOT_OK(strbuilder2.Finish(&strarray2));
// property "gender"
arrow::StringBuilder strbuilder3;
ARROW_RETURN_NOT_OK(strbuilder2.Append("male"));
ARROW_RETURN_NOT_OK(strbuilder2.Append("female"));
ARROW_RETURN_NOT_OK(strbuilder2.Append("female"));
std::shared_ptr<arrow::Array> strarray3;
ARROW_RETURN_NOT_OK(strbuilder2.Finish(&strarray3));
// schema
auto schema = arrow::schema({arrow::field("id", arrow::int64()),
arrow::field("firstName", arrow::utf8()),
arrow::field("lastName", arrow::utf8()),
arrow::field("gender", arrow::utf8())});
return arrow::Table::Make(schema, {i64array, strarray, strarray2, strarray3});
}
arrow::Result<std::shared_ptr<arrow::Table>> generate_adj_list_table() {
// source vertex id
arrow::Int64Builder i64builder;
ARROW_RETURN_NOT_OK(i64builder.AppendValues({1, 0, 0, 2}));
std::shared_ptr<arrow::Array> i64array;
ARROW_RETURN_NOT_OK(i64builder.Finish(&i64array));
// destination vertex id
arrow::Int64Builder i64builder2;
ARROW_RETURN_NOT_OK(i64builder2.AppendValues({0, 1, 2, 1}));
std::shared_ptr<arrow::Array> i64array2;
ARROW_RETURN_NOT_OK(i64builder2.Finish(&i64array2));
// schema
auto schema = arrow::schema(
{arrow::field(graphar::GeneralParams::kSrcIndexCol, arrow::int64()),
arrow::field(graphar::GeneralParams::kDstIndexCol, arrow::int64())});
return arrow::Table::Make(schema, {i64array, i64array2});
}
arrow::Result<std::shared_ptr<arrow::Table>> generate_edge_property_table() {
// property "creationDate"
arrow::StringBuilder strbuilder;
ARROW_RETURN_NOT_OK(strbuilder.Append("2010-01-01"));
ARROW_RETURN_NOT_OK(strbuilder.Append("2011-01-01"));
ARROW_RETURN_NOT_OK(strbuilder.Append("2012-01-01"));
ARROW_RETURN_NOT_OK(strbuilder.Append("2013-01-01"));
std::shared_ptr<arrow::Array> strarray;
ARROW_RETURN_NOT_OK(strbuilder.Finish(&strarray));
// schema
auto schema = arrow::schema({arrow::field("creationDate", arrow::utf8())});
return arrow::Table::Make(schema, {strarray});
}
void vertex_property_writer(
const std::shared_ptr<graphar::GraphInfo>& graph_info) {
// create writer
std::string vertex_meta_file =
TEST_DATA_DIR + "/ldbc_sample/parquet/" + "person.vertex.yml";
auto vertex_meta = graphar::Yaml::LoadFile(vertex_meta_file).value();
auto vertex_info = graphar::VertexInfo::Load(vertex_meta).value();
ASSERT(vertex_info->GetLabel() == "person");
auto maybe_writer = graphar::VertexPropertyWriter::Make(vertex_info, "/tmp/");
ASSERT(maybe_writer.status().ok());
auto writer = maybe_writer.value();
// construct vertex property table
auto table = generate_vertex_table().ValueOrDie();
// print
std::cout << "rows number of vertex table: " << table->num_rows()
<< std::endl;
std::cout << "schema of vertex table: " << std::endl
<< table->schema()->ToString() << std::endl;
// use writer
// set validate level
writer->SetValidateLevel(graphar::ValidateLevel::strong_validate);
// write the table
ASSERT(writer->WriteTable(table, 0).ok());
// write the number of vertices
ASSERT(writer->WriteVerticesNum(table->num_rows()).ok());
std::cout << "writing vertex data successfully!" << std::endl;
// check vertex count
auto path = "/tmp/vertex/person/vertex_count";
auto fs = arrow::fs::FileSystemFromUriOrPath(path).ValueOrDie();
auto input = fs->OpenInputStream(path).ValueOrDie();
auto num = input->Read(sizeof(graphar::IdType)).ValueOrDie();
graphar::IdType* ptr = (graphar::IdType*) num->data();
std::cout << "vertex count from reading written file: " << *ptr << std::endl;
}
void edge_chunk_writer(const std::shared_ptr<graphar::GraphInfo>& graph_info) {
// construct writer
std::string edge_meta_file =
TEST_DATA_DIR + "/ldbc_sample/csv/" + "person_knows_person.edge.yml";
auto edge_meta = graphar::Yaml::LoadFile(edge_meta_file).value();
auto edge_info = graphar::EdgeInfo::Load(edge_meta).value();
auto adj_list_type = graphar::AdjListType::ordered_by_source;
auto maybe_writer =
graphar::EdgeChunkWriter::Make(edge_info, "/tmp/", adj_list_type);
ASSERT(maybe_writer.status().ok());
auto writer = maybe_writer.value();
// construct property chunk
auto chunk = generate_edge_property_table().ValueOrDie();
// print
std::cout << "rows number of edge property chunk: " << chunk->num_rows()
<< std::endl;
std::cout << "schema of edge property chunk: " << std::endl
<< chunk->schema()->ToString() << std::endl;
// construct adj list table
auto table = generate_adj_list_table().ValueOrDie();
// print
std::cout << "rows number of adj list table: " << table->num_rows()
<< std::endl;
std::cout << "schema of adj list table: " << std::endl
<< table->schema()->ToString() << std::endl;
// use writer
// set validate level
writer->SetValidateLevel(graphar::ValidateLevel::strong_validate);
// write a property chunk
auto pg = edge_info->GetPropertyGroup("creationDate");
ASSERT(pg != nullptr);
ASSERT(writer->WritePropertyChunk(chunk, pg, 0, 0).ok());
// write adj list of vertex chunk 0 to files
ASSERT(writer->SortAndWriteAdjListTable(table, 0, 0).ok());
// write number of edges for vertex chunk 0
ASSERT(writer->WriteEdgesNum(0, table->num_rows()).ok());
// write number of vertices
ASSERT(writer->WriteVerticesNum(903).ok());
std::cout << "writing edge data successfully!" << std::endl;
// check the number of edges
auto path = "/tmp/edge/person_knows_person/ordered_by_source/edge_count0";
auto fs = arrow::fs::FileSystemFromUriOrPath(path).ValueOrDie();
std::shared_ptr<arrow::io::InputStream> input =
fs->OpenInputStream(path).ValueOrDie();
auto edge_num = input->Read(sizeof(graphar::IdType)).ValueOrDie();
graphar::IdType* edge_num_ptr = (graphar::IdType*) edge_num->data();
std::cout << "edge number from reading written file: " << *edge_num_ptr
<< std::endl;
}
int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path =
TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto graph_info = graphar::GraphInfo::Load(path).value();
// vertex property writer
std::cout << "Vertex property writer" << std::endl;
std::cout << "----------------------" << std::endl;
vertex_property_writer(graph_info);
std::cout << std::endl;
// edge property writer
std::cout << "Edge property writer" << std::endl;
std::cout << "--------------------" << std::endl;
edge_chunk_writer(graph_info);
}