from ollama import Client import json import gradio as gr model = 'llama3.2' context = [] client = Client( host='http://172.22.0.29:11434', ) def generate(prompt, context, top_k, top_p, temp): global client response = client.generate( model=model, prompt= prompt, context= context, stream= False, options= { 'top_k': top_k, 'temperature':top_p, 'top_p': temp } ) return response.response, response.context def chat(input, chat_history, top_k, top_p, temp): chat_history = chat_history or [] global context output, context = generate(input, context, top_k, top_p, temp) chat_history.append((input, output)) return chat_history, chat_history #########################Gradio Code########################## block = gr.Blocks() with block: gr.Markdown("""

Mein Assistent

""") chatbot = gr.Chatbot() message = gr.Textbox(placeholder="Type here") state = gr.State() with gr.Row(): top_k = gr.Slider(0.0,100.0, label="top_k", value=40, info="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)") top_p = gr.Slider(0.0,1.0, label="top_p", value=0.9, info=" Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)") temp = gr.Slider(0.0,2.0, label="temperature", value=0.8, info="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)") submit = gr.Button("Send") submit.click(chat, inputs=[message, state, top_k, top_p, temp], outputs=[chatbot, state]) block.launch(debug=True)