extensions/llamacpp/processors/LlamaContext.h - nifi-minifi-cpp - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 #pragma once

 #include <memory>
 #include <filesystem>
 #include <vector>
 #include <string_view>
 #include <string>
 #include <functional>
 #include <optional>
 #include "utils/expected.h"

 namespace org::apache::nifi::minifi::extensions::llamacpp::processors {

 struct LlamaChatMessage {
   std::string role;
   std::string content;
 };

 struct LlamaSamplerParams {
   std::optional<float> temperature;
   std::optional<int32_t> top_k;
   std::optional<float> top_p;
   std::optional<float> min_p;
   uint64_t min_keep{};
 };

 struct LlamaContextParams {
   uint32_t n_ctx{};
   uint32_t n_batch{};
   uint32_t n_ubatch{};
   uint32_t n_seq_max{};
   int32_t n_threads{};
   int32_t n_threads_batch{};
 };

 struct GenerationResult {
   std::chrono::milliseconds time_to_first_token{};
   uint64_t num_tokens_in{};
   uint64_t num_tokens_out{};
   double tokens_per_second{};
 };

 class LlamaContext {
  public:
   virtual std::optional<std::string> applyTemplate(const std::vector<LlamaChatMessage>& messages) = 0;
   virtual nonstd::expected<GenerationResult, std::string> generate(const std::string& input, std::function<void(std::string_view/*token*/)> token_handler) = 0;
   virtual ~LlamaContext() = default;
 };

 }  // namespace org::apache::nifi::minifi::extensions::llamacpp::processors
	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	#pragma once

	#include <memory>
	#include <filesystem>
	#include <vector>
	#include <string_view>
	#include <string>
	#include <functional>
	#include <optional>
	#include "utils/expected.h"

	namespace org::apache::nifi::minifi::extensions::llamacpp::processors {

	struct LlamaChatMessage {
	std::string role;
	std::string content;
	};

	struct LlamaSamplerParams {
	std::optional<float> temperature;
	std::optional<int32_t> top_k;
	std::optional<float> top_p;
	std::optional<float> min_p;
	uint64_t min_keep{};
	};

	struct LlamaContextParams {
	uint32_t n_ctx{};
	uint32_t n_batch{};
	uint32_t n_ubatch{};
	uint32_t n_seq_max{};
	int32_t n_threads{};
	int32_t n_threads_batch{};
	};

	struct GenerationResult {
	std::chrono::milliseconds time_to_first_token{};
	uint64_t num_tokens_in{};
	uint64_t num_tokens_out{};
	double tokens_per_second{};
	};

	class LlamaContext {
	public:
	virtual std::optional<std::string> applyTemplate(const std::vector<LlamaChatMessage>& messages) = 0;
	virtual nonstd::expected<GenerationResult, std::string> generate(const std::string& input, std::function<void(std::string_view/token/)> token_handler) = 0;
	virtual ~LlamaContext() = default;
	};

	} // namespace org::apache::nifi::minifi::extensions::llamacpp::processors