Spaces:

AFischer1985
/

BERUFENET.AI

Sleeping

App Files Files Community

AFischer1985 commited on Feb 8, 2024

Commit

2069576

verified ·

1 Parent(s): 6302da5

Initial commit

Browse files

Files changed (8) hide show

.gitattributes +1 -0
db/c3dc4fcc-b575-406f-987f-4e2d9d282883/data_level0.bin +3 -0
db/c3dc4fcc-b575-406f-987f-4e2d9d282883/header.bin +3 -0
db/c3dc4fcc-b575-406f-987f-4e2d9d282883/index_metadata.pickle +3 -0
db/c3dc4fcc-b575-406f-987f-4e2d9d282883/length.bin +3 -0
db/c3dc4fcc-b575-406f-987f-4e2d9d282883/link_lists.bin +3 -0
db/chroma.sqlite3 +3 -0
run.py +101 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

db/c3dc4fcc-b575-406f-987f-4e2d9d282883/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b847b433ffbe825c9b7a325217a52214c01b32f0da3060ff91c73ce7682de0a7
+size 16060000

db/c3dc4fcc-b575-406f-987f-4e2d9d282883/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7eb3d22f8e3406cb187c96ba787c1946b3b083c965d1dc81c68cbd6ed33663f4
+size 100

db/c3dc4fcc-b575-406f-987f-4e2d9d282883/index_metadata.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45ec30953f1f095a65b18e2e9e27ed72bb53c094178bdd2d2f1296c6fd2ea19e
+size 126897

db/c3dc4fcc-b575-406f-987f-4e2d9d282883/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf428039177ac0281bb54df22b4eb70a4f986d07422a428b25b7ff53fbaa8a44
+size 20000

db/c3dc4fcc-b575-406f-987f-4e2d9d282883/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a7bf5913abf03fc400fbd706f94f59f83192432c334a3ed229a7a3111593106
+size 42780

db/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a03963c51eeaca2bd9af237fadc4d990c85c8ceacf5cb9423278c972b993bd2
+size 49680384

run.py ADDED Viewed

	@@ -0,0 +1,101 @@

+#############################################################################
+# Title:  BERUFENET.AI
+# Author: Andreas Fischer
+# Date:   January 4th, 2024
+# Last update: February 8th, 2024
+#############################################################################
+dbPath="/home/af/Schreibtisch/Code/gradio/BERUFENET/db"
+if(os.path.exists(dbPath)==False): dbPath="/home/user/app/db"
+print(dbPath)
+# Chroma-DB
+#-----------
+import chromadb
+#client = chromadb.Client()
+path=dbPath
+client = chromadb.PersistentClient(path=path)
+print(client.heartbeat())
+print(client.get_version())
+print(client.list_collections())
+from chromadb.utils import embedding_functions
+default_ef = embedding_functions.DefaultEmbeddingFunction()
+sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")
+#instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
+print(str(client.list_collections()))
+global collection
+if("name=BerufenetDB1" in str(client.list_collections())): #(False):
+  print("BerufenetDB1 found!")
+  collection = client.get_collection(name="BerufenetDB1", embedding_function=sentence_transformer_ef)
+print("Database ready!")
+print(collection.count())
+# Model
+#-------
+from huggingface_hub import InferenceClient
+import gradio as gr
+client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+# Gradio-GUI
+#------------
+import gradio as gr
+import json
+def format_prompt(message, history):
+  prompt = "" #"<s>"
+  #for user_prompt, bot_response in history:
+  #  prompt += f"[INST] {user_prompt} [/INST]"
+  #  prompt += f" {bot_response}</s> "
+  prompt += f"[INST] {message} [/INST]"
+  return prompt
+def response(
+    prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
+):
+    temperature = float(temperature)
+    if temperature < 1e-2: temperature = 1e-2
+    top_p = float(top_p)
+    generate_kwargs = dict(
+        temperature=temperature,
+        max_new_tokens=max_new_tokens,
+        top_p=top_p,
+        repetition_penalty=repetition_penalty,
+        do_sample=True,
+        seed=42,
+    )
+    addon=""
+    results=collection.query(
+      query_texts=[prompt],
+      n_results=5,
+      #where={"source": "google-docs"}
+      #where_document={"$contains":"search_string"}
+    )
+    dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in results['distances'][0]]
+    sources=["source: "+s["source"]+")</small>" for s in results['metadatas'][0]]
+    results=results['documents'][0]
+    combination = zip(results,dists,sources)
+    combination = [' '.join(triplets) for triplets in combination]
+    print(str(prompt)+"\n\n"+str(combination))
+    if(len(results)>1):
+      addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
+    system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete Berufe empfiehlt."+addon+"\n\nUser-Anliegen:"
+    formatted_prompt = format_prompt(system+"\n"+prompt, history)
+    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    output = ""
+    for response in stream:
+        output += response.token.text
+        yield output
+    output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
+    yield output
+gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin ein KI-basiertes Assistenzsystem, das für jede Anfrage die am besten passenden Berufe empfiehlt.<br>Erzähle mir, was du gerne tust!"]],render_markdown=True),title="German BERUFENET-RAG-Interface to the Hugging Face Hub").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
+print("Interface up and running!")