Spaces:

SinDarSoup
/

LangNoter

Sleeping

App Files Files Community

SinDarSoup commited on Mar 6

Commit

b719566

1 Parent(s): db080f6

sample

Browse files

Files changed (4) hide show

app.py +100 -7
utils/caller/llm_client.py +25 -19
utils/learner/dataclass.py +7 -22
utils/learner/learner.py +13 -4

app.py CHANGED Viewed

@@ -1,15 +1,109 @@
 import gradio as gr
-async def chat_fn(message, history, state_history, state_audios):
-    return message, state_history, state_audios
 with gr.Blocks() as demo:
     gr.Markdown("# Lang Thrower")
-    state_history = gr.State([]) # The state for openai usage
     state_audios = gr.State([
         # {"text":"...", "path":"..."}
     ])
     textbox = gr.MultimodalTextbox(
         file_types=["image"],
         file_count="multiple",
@@ -19,11 +113,10 @@ with gr.Blocks() as demo:
     chat_interface = gr.ChatInterface(
         fn=chat_fn,
         textbox=textbox,
-        additional_inputs=[state_history, state_audios],
-        additional_outputs=[state_history, state_audios],
     )
-    textbox.render()
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from utils.caller.llm_client import (
+    chat_completions,
+    image_to_one_of_content,
+    tts,
+)
+from utils.learner.learner import (
+    get_default_system_prompt,
+    DefaultTool,
+)
+from tempfile import NamedTemporaryFile
+import re
+import json
+def extract_json_from_code_block(text):
+    pattern = r'```json\s*([\s\S]*?)\s*```'
+    matches = re.findall(pattern, text)
+    json_objects = []
+    for match in matches:
+        try:
+            json_obj = json.loads(match)
+            json_objects.append(json_obj)
+        except json.JSONDecodeError:
+            continue  # Skip invalid JSON
+    return json_objects
+def gr_msg_to_openai_msg(gr_message, role="user"):
+    content = []
+    content.append({
+        "type":"text",
+        "text": gr_message.get("text")
+    })
+    for image_path in gr_message.get("files"):
+        try:
+            content.append(image_to_one_of_content(image_path))
+        except Exception as e:
+            gr.Warning(f"fail to load {image_path}, error msg {e}", duration=5)
+    return {
+        "role":role,
+        "content": content,
+    }
+async def chat_fn(gr_message, history, model, state_openai_messages:list, state_audios:list):
+    gr_response = {
+        "text":"",
+        "files":[],
+    }
+    openai_message = gr_msg_to_openai_msg(gr_message)
+    if not state_openai_messages:
+        state_openai_messages = [
+            {
+                "role":"system",
+                "content":get_default_system_prompt(),
+            }
+        ]
+    state_openai_messages.append(openai_message)
+    gr.Info("start text generation")
+    response = chat_completions(
+        messages=state_openai_messages,
+        model=model,
+    )
+    text = response.choices[0].message.content
+    state_openai_messages.append(response.choices[0].message)
+    gr_response["text"] = text
+    gr.Info("finish text generation")
+    yield gr_response, state_openai_messages, state_audios
+    J_list = extract_json_from_code_block(text)
+    if len(J_list) > 0:
+        J = J_list[0]
+        M = DefaultTool(**J)
+        for item in M.records:
+            try:
+                data = item.foreign.data
+                gr.Info(f"Start STT ({data})")
+                tmp_file_name = NamedTemporaryFile(
+                    suffix=".mp3",
+                    delete=True,
+                ).name
+                tts(input=data).stream_to_file(tmp_file_name)
+                gr_response["files"].append(tmp_file_name)
+                gr.Info(f"END STT ({data})")
+            except Exception as e:
+                gr.Warning(f"Fail to generate audio. {e}")
+            yield gr_response, state_openai_messages, state_audios
 with gr.Blocks() as demo:
     gr.Markdown("# Lang Thrower")
+    state_openai_messages = gr.State([]) # The state for openai usage
     state_audios = gr.State([
         # {"text":"...", "path":"..."}
     ])
+    model = gr.Text("gpt-4o", label="model_name",)
     textbox = gr.MultimodalTextbox(
         file_types=["image"],
         file_count="multiple",
     chat_interface = gr.ChatInterface(
         fn=chat_fn,
         textbox=textbox,
+        additional_inputs=[model, state_openai_messages, state_audios],
+        additional_outputs=[state_openai_messages, state_audios],
     )
 if __name__ == "__main__":
+    demo.launch(debug=True)

utils/caller/llm_client.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import base64
 from openai import Client
 from gradio_client.utils import is_http_url_like
 import magic
 from pydantic import BaseModel
 from ..learner.learner import DefaultTool
 def get_client(api_key: str | None = None, **kwargs):
     return Client(
@@ -18,7 +22,7 @@ def encode_image(image_path:str):
     mime_type = mime.from_file(image_path)
     return f"data:{mime_type};base64,{base64_image}"
-def image_to_content(
     image_path:str,
     detail:str="auto",
 ):
@@ -48,34 +52,36 @@ def audio_to_content(
         }
     }
-async def chat_completions(
     messages: list,
     model:str,
     *,
     client : Client | None = None,
-    tool_models:list[BaseModel] = [DefaultTool],
     **kwargs,
-):
-    tools = kwargs.pop("tools", None)
-    if tools is None:
-        tools = []
-        for tool_model in tool_models:
-            tools.append(
-                {
-                    "type":"function",
-                    "function":{
-                        "parameters":tool_model.model_json_schema(),
-                        "strict":True,
-                    },
-                }
-            )
     client = client or get_client()
     response = client.chat.completions.create(
         model=model,
         messages=messages,
-        tools=tools,
         **kwargs,
     )
     return response

 import base64
 from openai import Client
+from openai.types.chat.chat_completion import ChatCompletion
 from gradio_client.utils import is_http_url_like
 import magic
 from pydantic import BaseModel
 from ..learner.learner import DefaultTool
+from typing import Any
+from copy import deepcopy
+import json
 def get_client(api_key: str | None = None, **kwargs):
     return Client(
     mime_type = mime.from_file(image_path)
     return f"data:{mime_type};base64,{base64_image}"
+def image_to_one_of_content(
     image_path:str,
     detail:str="auto",
 ):
         }
     }
+def chat_completions(
     messages: list,
     model:str,
     *,
     client : Client | None = None,
     **kwargs,
+)->ChatCompletion:
     client = client or get_client()
     response = client.chat.completions.create(
         model=model,
         messages=messages,
         **kwargs,
     )
+    return response
+def tts(
+    input,
+    *,
+    client : Client | None = None,
+    voice="fable",
+    model="tts-1"
+):
+    client = client or get_client()
+    response = client.audio.speech.create(
+        model=model,
+        voice=voice,
+        input=input,
+    )
     return response

utils/learner/dataclass.py CHANGED Viewed

@@ -10,28 +10,13 @@ from .language import (
     LANGUAGE_TO_CODE,
 )
 class _Record(BaseModel):
     "Base Data Model For Language Learner"
-    lang: T_LANGUAGE_CODES | T_LANGUAGES | str = Field(..., description="The language name.")
     data: str = Field(..., description="The data for the record, like `apple` is vocabulary, `How are you.` is a phrase. `I like your product! How much is this` is a sentence.")
-    type: None = Field(None, description="The field needs to be defined in the sub data model.")
-    meta: dict | Any = Field(None, description="The field to be implement or overwrite, please do not fill this yet.")
-    IPA: Optional[str] = Field(None, description="International Phonetic Alphabet")
-    @model_validator(mode='after')
-    def _validator_lang(self)->Self:
-        lang = self.lang.lower()
-        if lang in LANGUAGE_CODES:
-            lang = CODE_TO_LANGUAGE[lang]
-        else:
-            if self.meta is None:
-                self.meta = {}
-            self.meta["warning.lang"] = f"The language is not in the language list {LANGUAGES}."
-        self.lang = lang
-        return self
 class Vocabulary(_Record):
     """
@@ -44,7 +29,7 @@ class Vocabulary(_Record):
         Korean: "고양이", "차", "의사", "학교", "커피", "책"
         Italian: "Gatto", "Auto", "Dottore", "Scuola", "Caffè", "Libro"
     """
-    type: Literal['WORD'] = "WORD"
 class Phrase(_Record):
     """
@@ -57,7 +42,7 @@ class Phrase(_Record):
         Korean: "안녕하세요", "감사합니다", "사랑해요", "왜 그래요?", "오랜만이에요", "얼마예요?"
         Italian: "Ciao", "Grazie", "Ti amo", "Che succede?", "È da tanto tempo!", "Quanto costa?"
     """
-    type: Literal['WORD'] = "WORD"
 class Sentence(_Record):
@@ -71,7 +56,7 @@ class Sentence(_Record):
         Korean: "이건 귀여운 고양이예요.", "커피 한 잔 마시고 싶어요.", "어디에 살아요?", "오늘 날씨가 좋아요.", "저를 도와줄 수 있어요?", "일본어와 한국어를 공부하고 있어요."
         Italian: "Questo è un gatto carino.", "Voglio bere una tazza di caffè.", "Dove vivi?", "Oggi il tempo è bello.", "Puoi aiutarmi?", "Sto imparando il giapponese e il coreano."
     """
-    type: Literal['SENTENCE'] = "SENTENCE"
 class ReadableReference(BaseModel):

     LANGUAGE_TO_CODE,
 )
 class _Record(BaseModel):
     "Base Data Model For Language Learner"
+    lang: T_LANGUAGES | str = Field(..., description="The language name.")
     data: str = Field(..., description="The data for the record, like `apple` is vocabulary, `How are you.` is a phrase. `I like your product! How much is this` is a sentence.")
+    level: None = Field(None, description="The field needs to be defined in the sub data model.")
+    # _meta: dict = Field(..., description="The field to be implement or overwrite, please do not fill this yet.")
+    IPA: Optional[str] = Field(..., description="International Phonetic Alphabet")
 class Vocabulary(_Record):
     """
         Korean: "고양이", "차", "의사", "학교", "커피", "책"
         Italian: "Gatto", "Auto", "Dottore", "Scuola", "Caffè", "Libro"
     """
+    level: Literal['WORD']
 class Phrase(_Record):
     """
         Korean: "안녕하세요", "감사합니다", "사랑해요", "왜 그래요?", "오랜만이에요", "얼마예요?"
         Italian: "Ciao", "Grazie", "Ti amo", "Che succede?", "È da tanto tempo!", "Quanto costa?"
     """
+    level: Literal['PHRASE']
 class Sentence(_Record):
         Korean: "이건 귀여운 고양이예요.", "커피 한 잔 마시고 싶어요.", "어디에 살아요?", "오늘 날씨가 좋아요.", "저를 도와줄 수 있어요?", "일본어와 한국어를 공부하고 있어요."
         Italian: "Questo è un gatto carino.", "Voglio bere una tazza di caffè.", "Dove vivi?", "Oggi il tempo è bello.", "Puoi aiutarmi?", "Sto imparando il giapponese e il coreano."
     """
+    level: Literal['SENTENCE']
 class ReadableReference(BaseModel):

utils/learner/learner.py CHANGED Viewed

@@ -10,13 +10,14 @@ class DefaultTool(BaseModel):
         `phrase` is less than 8
         `sentence` is less then 5
     """
-    vocabulary: list[R_Vocabulary]
-    phrase: list[R_Phrase]
-    sentence: list[R_Sentence]
 def get_default_system_prompt():
-    return """
     # Assistant Author:
     * 湯沂達 / Tang Yi Dar
         - [email protected]
@@ -37,6 +38,14 @@ def get_default_system_prompt():
     To fill the gap, he decide to create a instant language learner, which is able to generate the target language text and audio together.
     Most of the time, the input will be photos and texts.
     """
 __all__ = [

         `phrase` is less than 8
         `sentence` is less then 5
     """
+    records: list[R_Vocabulary | R_Phrase | R_Sentence]
+    # vocabulary: list[R_Vocabulary]
+    # phrase: list[R_Phrase]
+    # sentence: list[R_Sentence]
 def get_default_system_prompt():
+    return f"""
     # Assistant Author:
     * 湯沂達 / Tang Yi Dar
         - [email protected]
     To fill the gap, he decide to create a instant language learner, which is able to generate the target language text and audio together.
     Most of the time, the input will be photos and texts.
+    The schema is below:
+    {DefaultTool.model_json_schema()}
+    WHEN USER IS TRYING TODO THE TRANSLATION, PLEASE ALSO OUPUT JSON FORMAT LIKE FOLLOW:
+    ```json
+    ...
+    ```
     """
 __all__ = [