1from typing import Sequence
2
3import ollama
4from ollama import GenerateResponse
5from ollama import Options
6
7
[docs]
8class OllamaClientManager:
9 def __init__(self):
[docs]
10 self.client = ollama.Client(host="http://localhost:11434")
11 self._model = "llama3.1:8b"
12 self._context_size = 128_000
13
[docs]
14 def embed(self, content: str) -> Sequence[Sequence[float]]:
15 """Returns the LLM's embedding for the given input text.
16
17 Args:
18 content: The input textual content to embed.
19
20 Returns:
21 The embedding vector for the corresponding segment of the input content.
22 """
23 response = self.client.embed(
24 model=self._model,
25 input=content,
26 options=Options(num_ctx=self._context_size),
27 )
28 return response.embeddings
29
[docs]
30 def chat(self, prompt: str) -> str:
31 """Generates a response to the input prompt.
32
33 Args:
34 prompt: The input text to generate a response for.
35
36 Returns:
37 The generated response.
38 """
39 response: GenerateResponse = self.client.generate(
40 model=self._model,
41 prompt=prompt,
42 options=Options(
43 num_ctx=self._context_size,
44 temperature=0,
45 ),
46 )
47 return response.response