ibrahimBlyc commited on
Commit
6ec905a
·
1 Parent(s): 1616b8c

Update space

Browse files
Files changed (1) hide show
  1. app.py +41 -29
app.py CHANGED
@@ -1,20 +1,7 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
  messages = [{"role": "system", "content": system_message}]
19
 
20
  for val in history:
@@ -27,22 +14,47 @@ def respond(
27
 
28
  response = ""
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- response += token
40
- yield response
 
41
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  demo = gr.ChatInterface(
47
  respond,
48
  additional_inputs=[
@@ -61,4 +73,4 @@ demo = gr.ChatInterface(
61
 
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
  import gradio as gr
2
+ import requests
3
 
4
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  messages = [{"role": "system", "content": system_message}]
6
 
7
  for val in history:
 
14
 
15
  response = ""
16
 
17
+ data = {
18
+ "model": "hf.co/ibrahimBlyc/LA_Llama:latest",
19
+ "prompt": "", # On construit le prompt complet ci-dessous
20
+ "max_tokens": max_tokens,
21
+ "temperature": temperature,
22
+ "top_p": top_p
23
+ # Autres paramètres Ollama si nécessaire
24
+ }
25
+
26
+ # Construire le prompt complet à partir des messages
27
+ prompt = ""
28
+ for msg in messages:
29
+ prompt += f"{msg['role']}: {msg['content']}\n"
30
+ data["prompt"] = prompt
31
+
32
+ url = "http://localhost:11434/api/generate"
33
+
34
+ try:
35
+ stream_response = requests.post(url, json=data, stream=True)
36
+ stream_response.raise_for_status() # Lève une exception si le code de status n'est pas 2xx
37
+
38
+ for chunk in stream_response.iter_lines():
39
+ if chunk:
40
+ decoded_chunk = chunk.decode()
41
+ try:
42
+ response_json = eval(decoded_chunk) # Évalue la réponse JSON. Attention à la sécurité !
43
+ token = response_json.get("response", "")
44
+
45
+ if token:
46
+ response += token
47
+ yield response
48
+ except (SyntaxError, NameError, json.JSONDecodeError) as e:
49
+ print(f"Erreur lors du décodage du chunk : {e}. Chunk : {decoded_chunk}")
50
+ yield f"Erreur: Impossible de décoder la réponse du serveur."
51
+ return
52
 
53
+ except requests.exceptions.RequestException as e:
54
+ print(f"Erreur de requête : {e}")
55
+ yield f"Erreur: Impossible de communiquer avec le serveur Ollama."
56
 
57
 
 
 
 
58
  demo = gr.ChatInterface(
59
  respond,
60
  additional_inputs=[
 
73
 
74
 
75
  if __name__ == "__main__":
76
+ demo.launch(share=True)