hevold commited on
Commit
e9f685e
·
verified ·
1 Parent(s): e99acf7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -14
app.py CHANGED
@@ -1,11 +1,20 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -15,6 +24,30 @@ def respond(
15
  temperature,
16
  top_p,
17
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  messages = [{"role": "system", "content": system_message}]
19
 
20
  for val in history:
@@ -27,17 +60,18 @@ def respond(
27
 
28
  response = ""
29
 
30
- for message in client.chat_completion(
31
- messages,
 
32
  max_tokens=max_tokens,
33
  stream=True,
34
  temperature=temperature,
35
  top_p=top_p,
36
  ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
 
42
 
43
  """
@@ -46,9 +80,12 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
46
  demo = gr.ChatInterface(
47
  respond,
48
  additional_inputs=[
49
- gr.Textbox(value="Du er en hjelpsom assistent", label="System message"),
 
 
 
50
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.2, step=0.1, label="Temperature"),
52
  gr.Slider(
53
  minimum=0.1,
54
  maximum=1.0,
@@ -57,8 +94,48 @@ demo = gr.ChatInterface(
57
  label="Top-p (nucleus sampling)",
58
  ),
59
  ],
 
 
60
  )
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
  import gradio as gr
2
+ from openai import OpenAI
3
+ import os
4
+ import numpy as np
5
+ from src.document_processing.processor import DocumentProcessor
6
+ from src.rag.retriever import Retriever
7
+ from src.rag.generator import Generator
8
+ from src.api.openai_api import OpenAIAPI
9
 
10
+ # Initialize OpenAI client
11
+ api_key = os.environ.get("OPENAI_API_KEY", "")
12
+ openai_api = OpenAIAPI(api_key=api_key)
 
13
 
14
+ # Initialize RAG components with OpenAI API
15
+ document_processor = DocumentProcessor(api_client=openai_api)
16
+ retriever = Retriever(api_client=openai_api)
17
+ generator = Generator(api_client=openai_api)
18
 
19
  def respond(
20
  message,
 
24
  temperature,
25
  top_p,
26
  ):
27
+ # Check if we should use RAG
28
+ use_rag = "bruk dokumenter" in message.lower() or "bruk rag" in message.lower()
29
+
30
+ if use_rag:
31
+ # Use our RAG implementation with GPT-4o
32
+ try:
33
+ # Retrieve relevant chunks
34
+ retrieved_chunks = retriever.retrieve(message)
35
+
36
+ # Generate response using RAG
37
+ response = generator.generate(
38
+ query=message,
39
+ retrieved_chunks=retrieved_chunks,
40
+ temperature=temperature
41
+ )
42
+
43
+ yield response
44
+ return
45
+ except Exception as e:
46
+ # If RAG fails, fall back to standard GPT-4o
47
+ print(f"RAG failed: {str(e)}, falling back to standard GPT-4o")
48
+
49
+ # Standard GPT-4o approach
50
+ client = OpenAI(api_key=api_key)
51
  messages = [{"role": "system", "content": system_message}]
52
 
53
  for val in history:
 
60
 
61
  response = ""
62
 
63
+ for chunk in client.chat.completions.create(
64
+ model="gpt-4o",
65
+ messages=messages,
66
  max_tokens=max_tokens,
67
  stream=True,
68
  temperature=temperature,
69
  top_p=top_p,
70
  ):
71
+ content = chunk.choices[0].delta.content
72
+ if content:
73
+ response += content
74
+ yield response
75
 
76
 
77
  """
 
80
  demo = gr.ChatInterface(
81
  respond,
82
  additional_inputs=[
83
+ gr.Textbox(
84
+ value="Du er en hjelpsom assistent som svarer på norsk. Bruk kunnskapen din til å svare på spørsmål. Hvis brukeren skriver 'bruk dokumenter' eller 'bruk RAG', vil du bruke Retrieval-Augmented Generation for å svare basert på opplastede dokumenter.",
85
+ label="System message"
86
+ ),
87
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
88
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
89
  gr.Slider(
90
  minimum=0.1,
91
  maximum=1.0,
 
94
  label="Top-p (nucleus sampling)",
95
  ),
96
  ],
97
+ title="Norwegian RAG Chatbot with GPT-4o",
98
+ description="En chatbot basert på Retrieval-Augmented Generation (RAG) for norsk språk med GPT-4o. Skriv 'bruk dokumenter' eller 'bruk RAG' i meldingen din for å aktivere RAG-funksjonalitet.",
99
  )
100
 
101
+ # Create the document upload interface
102
+ with gr.Blocks() as document_upload:
103
+ with gr.Tab("Last opp dokumenter"):
104
+ with gr.Row():
105
+ with gr.Column(scale=2):
106
+ file_output = gr.File(label="Opplastede dokumenter")
107
+ upload_button = gr.UploadButton(
108
+ "Klikk for å laste opp dokument",
109
+ file_types=["pdf", "txt", "html"],
110
+ file_count="multiple"
111
+ )
112
+
113
+ with gr.Column(scale=3):
114
+ documents_list = gr.Dataframe(
115
+ headers=["Dokument ID", "Filnavn", "Dato", "Chunks"],
116
+ label="Dokumentliste",
117
+ interactive=False
118
+ )
119
+
120
+ process_status = gr.Textbox(label="Status", interactive=False)
121
+ refresh_btn = gr.Button("Oppdater dokumentliste")
122
+
123
+ # Set up event handlers
124
+ upload_button.upload(
125
+ fn=document_processor.process_document,
126
+ inputs=[upload_button],
127
+ outputs=[process_status, documents_list]
128
+ )
129
+
130
+ refresh_btn.click(
131
+ fn=lambda: [[doc_id, meta.get("filename", "N/A"), meta.get("processed_date", "N/A"), meta.get("chunk_count", 0)]
132
+ for doc_id, meta in document_processor.get_all_documents().items()],
133
+ inputs=None,
134
+ outputs=[documents_list]
135
+ )
136
+
137
+ # Combine the interfaces
138
+ app = gr.TabbedInterface([demo, document_upload], ["Chat", "Dokumenter"])
139
 
140
  if __name__ == "__main__":
141
+ app.launch()