MS-Lucidia-Voice-Gateway-MCP

src
mcp_ollama_link
"""The purpose of this module is to provide a streaming chat interface for Ollama."""

from ollama import Client
from typing import List, Dict

# Create a single client instance
client = Client(host='http://localhost:11434')

def query_ollama_stream(messages: List[Dict[str, str]], model: str = "llama2") -> str:
    """
    Stream responses from Ollama model while maintaining chat history.
    
    Args:
        messages: List of message dictionaries with 'role' and 'content'
        model: Name of the Ollama model to use
    
    Returns:
        str: The complete response from the model
    """
    try:
        response_content = ""
        # Stream the response
        for chunk in client.chat(
            model=model,
            messages=messages,
            stream=True
        ):
            if 'content' in chunk['message']:
                content = chunk['message']['content']
                print(content, end='', flush=True)
                response_content += content
        # Return only the latest response
        return response_content
    except Exception as e:
        print(f"Error in streaming: {e}")
        return None

if __name__ == "__main__":
    # Initialize conversation history
    messages = []
    
    print("Start chatting! (type 'quit' to exit)")
    while True:
        # Get user input
        user_input = input("\nYou: ")
        if user_input.lower() == 'quit':
            break
            
        # Add user message to history
        messages.append({
            'role': 'user',
            'content': user_input
        })
        
        # Get and print assistant's response
        print("\nAssistant: ", end='')
        assistant_response = query_ollama_stream(messages)
        
        # Add assistant's response to history
        if assistant_response:
            messages.append({
                'role': 'assistant',
                'content': assistant_response
            })