modules/research-framework/simexr_mod/llm/local_llm.py - airavata - Git at Google

 import subprocess

 class LocalLLM:
     """
     Thin wrapper around Ollama (or LM Studio) to run a local model.

     Parameters
     ----------
     model : str
         Ollama model tag, e.g. "deepseek-coder:6.7b-instruct".
     """

     def __init__(self, model: str = "codellama:7b-instruct"):
         self.model = model

     # ------------- PUBLIC API ------------------------------------------------
     def generate(
         self,
         prompt: str,
         system_prompt: str = "",
         temperature: float = 0.0,
         num_tokens: int | None = None,            # optional n-token limit
     ) -> str:
         """
         Call the local LLM with a prompt.

         Uses the Ollama chat command:
             /set parameter temperature <value>

         Notes
         -----
         * Works even on older Ollama builds that don’t support --temp.
         * You can still change top-p, top-k, etc. the same way.
         """
         # prepend the /set command, then optional system prompt
         header_lines = [f"/set parameter temperature {temperature}"]
         if system_prompt.strip():
             header_lines.append(system_prompt.strip())
         header = "\n\n".join(header_lines)

         full_prompt = f"{header}\n\n{prompt.strip()}"

         cmd = ["ollama", "run", self.model, full_prompt]
         if num_tokens is not None:
             cmd += ["-n", str(num_tokens), "--no-cache"]

         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode != 0:
             raise RuntimeError(f"Ollama stderr:\n{result.stderr}")
         if not result.stdout.strip():
             raise RuntimeError("Ollama returned an empty response.")
         return result.stdout.strip()
	import subprocess

	class LocalLLM:
	"""
	Thin wrapper around Ollama (or LM Studio) to run a local model.

	Parameters
	----------
	model : str
	Ollama model tag, e.g. "deepseek-coder:6.7b-instruct".
	"""

	def __init__(self, model: str = "codellama:7b-instruct"):
	self.model = model

	# ------------- PUBLIC API ------------------------------------------------
	def generate(
	self,
	prompt: str,
	system_prompt: str = "",
	temperature: float = 0.0,
	num_tokens: int \| None = None, # optional n-token limit
	) -> str:
	"""
	Call the local LLM with a prompt.

	Uses the Ollama chat command:
	/set parameter temperature <value>

	Notes
	-----
	* Works even on older Ollama builds that don’t support --temp.
	* You can still change top-p, top-k, etc. the same way.
	"""
	# prepend the /set command, then optional system prompt
	header_lines = [f"/set parameter temperature {temperature}"]
	if system_prompt.strip():
	header_lines.append(system_prompt.strip())
	header = "\n\n".join(header_lines)

	full_prompt = f"{header}\n\n{prompt.strip()}"

	cmd = ["ollama", "run", self.model, full_prompt]
	if num_tokens is not None:
	cmd += ["-n", str(num_tokens), "--no-cache"]

	result = subprocess.run(cmd, capture_output=True, text=True)
	if result.returncode != 0:
	raise RuntimeError(f"Ollama stderr:\n{result.stderr}")
	if not result.stdout.strip():
	raise RuntimeError("Ollama returned an empty response.")
	return result.stdout.strip()