CodeNight-KI/text2text/main_gradio.py

54 lines
2.0 KiB
Python

from ollama import Client
import json
import gradio as gr
model = 'llama3.2'
context = []
client = Client(
host='http://172.22.0.29:11434',
)
def generate(prompt, context, top_k, top_p, temp):
global client
response = client.generate(
model=model,
prompt= prompt,
context= context,
stream= False,
options= {
'top_k': top_k,
'temperature':top_p,
'top_p': temp
}
)
return response.response, response.context
def chat(input, chat_history, top_k, top_p, temp):
chat_history = chat_history or []
global context
output, context = generate(input, context, top_k, top_p, temp)
chat_history.append((input, output))
return chat_history, chat_history
#########################Gradio Code##########################
block = gr.Blocks()
with block:
gr.Markdown("""<h1><center> Mein Assistent </center></h1>""")
chatbot = gr.Chatbot()
message = gr.Textbox(placeholder="Type here")
state = gr.State()
with gr.Row():
top_k = gr.Slider(0.0,100.0, label="top_k", value=40, info="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)")
top_p = gr.Slider(0.0,1.0, label="top_p", value=0.9, info=" Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)")
temp = gr.Slider(0.0,2.0, label="temperature", value=0.8, info="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)")
submit = gr.Button("Send")
submit.click(chat, inputs=[message, state, top_k, top_p, temp], outputs=[chatbot, state])
block.launch(debug=True)