SinDarSoup commited on
Commit
b719566
Β·
1 Parent(s): db080f6
app.py CHANGED
@@ -1,15 +1,109 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- async def chat_fn(message, history, state_history, state_audios):
4
- return message, state_history, state_audios
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  with gr.Blocks() as demo:
7
  gr.Markdown("# Lang Thrower")
8
- state_history = gr.State([]) # The state for openai usage
9
  state_audios = gr.State([
10
  # {"text":"...", "path":"..."}
11
  ])
12
 
 
 
13
  textbox = gr.MultimodalTextbox(
14
  file_types=["image"],
15
  file_count="multiple",
@@ -19,11 +113,10 @@ with gr.Blocks() as demo:
19
  chat_interface = gr.ChatInterface(
20
  fn=chat_fn,
21
  textbox=textbox,
22
- additional_inputs=[state_history, state_audios],
23
- additional_outputs=[state_history, state_audios],
24
  )
25
- textbox.render()
26
 
27
 
28
  if __name__ == "__main__":
29
- demo.launch()
 
1
  import gradio as gr
2
+ from utils.caller.llm_client import (
3
+ chat_completions,
4
+ image_to_one_of_content,
5
+ tts,
6
+ )
7
+ from utils.learner.learner import (
8
+ get_default_system_prompt,
9
+ DefaultTool,
10
+ )
11
+ from tempfile import NamedTemporaryFile
12
+ import re
13
+ import json
14
 
15
+ def extract_json_from_code_block(text):
16
+ pattern = r'```json\s*([\s\S]*?)\s*```'
17
+ matches = re.findall(pattern, text)
18
+
19
+ json_objects = []
20
+ for match in matches:
21
+ try:
22
+ json_obj = json.loads(match)
23
+ json_objects.append(json_obj)
24
+ except json.JSONDecodeError:
25
+ continue # Skip invalid JSON
26
+
27
+ return json_objects
28
+
29
+ def gr_msg_to_openai_msg(gr_message, role="user"):
30
+ content = []
31
+ content.append({
32
+ "type":"text",
33
+ "text": gr_message.get("text")
34
+ })
35
+ for image_path in gr_message.get("files"):
36
+ try:
37
+ content.append(image_to_one_of_content(image_path))
38
+ except Exception as e:
39
+ gr.Warning(f"fail to load {image_path}, error msg {e}", duration=5)
40
+
41
+ return {
42
+ "role":role,
43
+ "content": content,
44
+ }
45
+
46
+
47
+ async def chat_fn(gr_message, history, model, state_openai_messages:list, state_audios:list):
48
+ gr_response = {
49
+ "text":"",
50
+ "files":[],
51
+ }
52
+ openai_message = gr_msg_to_openai_msg(gr_message)
53
+
54
+ if not state_openai_messages:
55
+ state_openai_messages = [
56
+ {
57
+ "role":"system",
58
+ "content":get_default_system_prompt(),
59
+ }
60
+ ]
61
+
62
+ state_openai_messages.append(openai_message)
63
+
64
+
65
+ gr.Info("start text generation")
66
+ response = chat_completions(
67
+ messages=state_openai_messages,
68
+ model=model,
69
+ )
70
+
71
+ text = response.choices[0].message.content
72
+ state_openai_messages.append(response.choices[0].message)
73
+ gr_response["text"] = text
74
+ gr.Info("finish text generation")
75
+
76
+ yield gr_response, state_openai_messages, state_audios
77
+
78
+ J_list = extract_json_from_code_block(text)
79
+ if len(J_list) > 0:
80
+ J = J_list[0]
81
+ M = DefaultTool(**J)
82
+ for item in M.records:
83
+ try:
84
+ data = item.foreign.data
85
+ gr.Info(f"Start STT ({data})")
86
+ tmp_file_name = NamedTemporaryFile(
87
+ suffix=".mp3",
88
+ delete=True,
89
+ ).name
90
+ tts(input=data).stream_to_file(tmp_file_name)
91
+ gr_response["files"].append(tmp_file_name)
92
+ gr.Info(f"END STT ({data})")
93
+ except Exception as e:
94
+ gr.Warning(f"Fail to generate audio. {e}")
95
+
96
+ yield gr_response, state_openai_messages, state_audios
97
 
98
  with gr.Blocks() as demo:
99
  gr.Markdown("# Lang Thrower")
100
+ state_openai_messages = gr.State([]) # The state for openai usage
101
  state_audios = gr.State([
102
  # {"text":"...", "path":"..."}
103
  ])
104
 
105
+ model = gr.Text("gpt-4o", label="model_name",)
106
+
107
  textbox = gr.MultimodalTextbox(
108
  file_types=["image"],
109
  file_count="multiple",
 
113
  chat_interface = gr.ChatInterface(
114
  fn=chat_fn,
115
  textbox=textbox,
116
+ additional_inputs=[model, state_openai_messages, state_audios],
117
+ additional_outputs=[state_openai_messages, state_audios],
118
  )
 
119
 
120
 
121
  if __name__ == "__main__":
122
+ demo.launch(debug=True)
utils/caller/llm_client.py CHANGED
@@ -1,9 +1,13 @@
1
  import base64
2
  from openai import Client
 
3
  from gradio_client.utils import is_http_url_like
4
  import magic
5
  from pydantic import BaseModel
6
  from ..learner.learner import DefaultTool
 
 
 
7
 
8
  def get_client(api_key: str | None = None, **kwargs):
9
  return Client(
@@ -18,7 +22,7 @@ def encode_image(image_path:str):
18
  mime_type = mime.from_file(image_path)
19
  return f"data:{mime_type};base64,{base64_image}"
20
 
21
- def image_to_content(
22
  image_path:str,
23
  detail:str="auto",
24
  ):
@@ -48,34 +52,36 @@ def audio_to_content(
48
  }
49
  }
50
 
51
- async def chat_completions(
52
  messages: list,
53
  model:str,
54
  *,
55
  client : Client | None = None,
56
- tool_models:list[BaseModel] = [DefaultTool],
57
  **kwargs,
58
- ):
59
- tools = kwargs.pop("tools", None)
60
- if tools is None:
61
- tools = []
62
- for tool_model in tool_models:
63
- tools.append(
64
- {
65
- "type":"function",
66
- "function":{
67
- "parameters":tool_model.model_json_schema(),
68
- "strict":True,
69
- },
70
- }
71
- )
72
-
73
  client = client or get_client()
 
74
  response = client.chat.completions.create(
75
  model=model,
76
  messages=messages,
77
- tools=tools,
78
  **kwargs,
79
  )
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  return response
 
1
  import base64
2
  from openai import Client
3
+ from openai.types.chat.chat_completion import ChatCompletion
4
  from gradio_client.utils import is_http_url_like
5
  import magic
6
  from pydantic import BaseModel
7
  from ..learner.learner import DefaultTool
8
+ from typing import Any
9
+ from copy import deepcopy
10
+ import json
11
 
12
  def get_client(api_key: str | None = None, **kwargs):
13
  return Client(
 
22
  mime_type = mime.from_file(image_path)
23
  return f"data:{mime_type};base64,{base64_image}"
24
 
25
+ def image_to_one_of_content(
26
  image_path:str,
27
  detail:str="auto",
28
  ):
 
52
  }
53
  }
54
 
55
+ def chat_completions(
56
  messages: list,
57
  model:str,
58
  *,
59
  client : Client | None = None,
 
60
  **kwargs,
61
+ )->ChatCompletion:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  client = client or get_client()
63
+
64
  response = client.chat.completions.create(
65
  model=model,
66
  messages=messages,
 
67
  **kwargs,
68
  )
69
 
70
+ return response
71
+
72
+ def tts(
73
+ input,
74
+ *,
75
+ client : Client | None = None,
76
+ voice="fable",
77
+ model="tts-1"
78
+ ):
79
+ client = client or get_client()
80
+
81
+ response = client.audio.speech.create(
82
+ model=model,
83
+ voice=voice,
84
+ input=input,
85
+ )
86
+
87
  return response
utils/learner/dataclass.py CHANGED
@@ -10,28 +10,13 @@ from .language import (
10
  LANGUAGE_TO_CODE,
11
  )
12
 
13
-
14
-
15
  class _Record(BaseModel):
16
  "Base Data Model For Language Learner"
17
- lang: T_LANGUAGE_CODES | T_LANGUAGES | str = Field(..., description="The language name.")
18
  data: str = Field(..., description="The data for the record, like `apple` is vocabulary, `How are you.` is a phrase. `I like your product! How much is this` is a sentence.")
19
- type: None = Field(None, description="The field needs to be defined in the sub data model.")
20
- meta: dict | Any = Field(None, description="The field to be implement or overwrite, please do not fill this yet.")
21
- IPA: Optional[str] = Field(None, description="International Phonetic Alphabet")
22
-
23
- @model_validator(mode='after')
24
- def _validator_lang(self)->Self:
25
- lang = self.lang.lower()
26
- if lang in LANGUAGE_CODES:
27
- lang = CODE_TO_LANGUAGE[lang]
28
- else:
29
- if self.meta is None:
30
- self.meta = {}
31
- self.meta["warning.lang"] = f"The language is not in the language list {LANGUAGES}."
32
-
33
- self.lang = lang
34
- return self
35
 
36
  class Vocabulary(_Record):
37
  """
@@ -44,7 +29,7 @@ class Vocabulary(_Record):
44
  Korean: "고양이", "μ°¨", "μ˜μ‚¬", "학ꡐ", "컀피", "μ±…"
45
  Italian: "Gatto", "Auto", "Dottore", "Scuola", "CaffΓ¨", "Libro"
46
  """
47
- type: Literal['WORD'] = "WORD"
48
 
49
  class Phrase(_Record):
50
  """
@@ -57,7 +42,7 @@ class Phrase(_Record):
57
  Korean: "μ•ˆλ…•ν•˜μ„Έμš”", "κ°μ‚¬ν•©λ‹ˆλ‹€", "μ‚¬λž‘ν•΄μš”", "μ™œ κ·Έλž˜μš”?", "μ˜€λžœλ§Œμ΄μ—μš”", "μ–Όλ§ˆμ˜ˆμš”?"
58
  Italian: "Ciao", "Grazie", "Ti amo", "Che succede?", "È da tanto tempo!", "Quanto costa?"
59
  """
60
- type: Literal['WORD'] = "WORD"
61
 
62
 
63
  class Sentence(_Record):
@@ -71,7 +56,7 @@ class Sentence(_Record):
71
  Korean: "이건 κ·€μ—¬μš΄ κ³ μ–‘μ΄μ˜ˆμš”.", "컀피 ν•œ μž” λ§ˆμ‹œκ³  μ‹Άμ–΄μš”.", "어디에 μ‚΄μ•„μš”?", "였늘 날씨가 μ’‹μ•„μš”.", "μ €λ₯Ό 도와쀄 수 μžˆμ–΄μš”?", "일본어와 ν•œκ΅­μ–΄λ₯Ό κ³΅λΆ€ν•˜κ³  μžˆμ–΄μš”."
72
  Italian: "Questo Γ¨ un gatto carino.", "Voglio bere una tazza di caffΓ¨.", "Dove vivi?", "Oggi il tempo Γ¨ bello.", "Puoi aiutarmi?", "Sto imparando il giapponese e il coreano."
73
  """
74
- type: Literal['SENTENCE'] = "SENTENCE"
75
 
76
 
77
  class ReadableReference(BaseModel):
 
10
  LANGUAGE_TO_CODE,
11
  )
12
 
 
 
13
  class _Record(BaseModel):
14
  "Base Data Model For Language Learner"
15
+ lang: T_LANGUAGES | str = Field(..., description="The language name.")
16
  data: str = Field(..., description="The data for the record, like `apple` is vocabulary, `How are you.` is a phrase. `I like your product! How much is this` is a sentence.")
17
+ level: None = Field(None, description="The field needs to be defined in the sub data model.")
18
+ # _meta: dict = Field(..., description="The field to be implement or overwrite, please do not fill this yet.")
19
+ IPA: Optional[str] = Field(..., description="International Phonetic Alphabet")
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  class Vocabulary(_Record):
22
  """
 
29
  Korean: "고양이", "μ°¨", "μ˜μ‚¬", "학ꡐ", "컀피", "μ±…"
30
  Italian: "Gatto", "Auto", "Dottore", "Scuola", "CaffΓ¨", "Libro"
31
  """
32
+ level: Literal['WORD']
33
 
34
  class Phrase(_Record):
35
  """
 
42
  Korean: "μ•ˆλ…•ν•˜μ„Έμš”", "κ°μ‚¬ν•©λ‹ˆλ‹€", "μ‚¬λž‘ν•΄μš”", "μ™œ κ·Έλž˜μš”?", "μ˜€λžœλ§Œμ΄μ—μš”", "μ–Όλ§ˆμ˜ˆμš”?"
43
  Italian: "Ciao", "Grazie", "Ti amo", "Che succede?", "È da tanto tempo!", "Quanto costa?"
44
  """
45
+ level: Literal['PHRASE']
46
 
47
 
48
  class Sentence(_Record):
 
56
  Korean: "이건 κ·€μ—¬μš΄ κ³ μ–‘μ΄μ˜ˆμš”.", "컀피 ν•œ μž” λ§ˆμ‹œκ³  μ‹Άμ–΄μš”.", "어디에 μ‚΄μ•„μš”?", "였늘 날씨가 μ’‹μ•„μš”.", "μ €λ₯Ό 도와쀄 수 μžˆμ–΄μš”?", "일본어와 ν•œκ΅­μ–΄λ₯Ό κ³΅λΆ€ν•˜κ³  μžˆμ–΄μš”."
57
  Italian: "Questo Γ¨ un gatto carino.", "Voglio bere una tazza di caffΓ¨.", "Dove vivi?", "Oggi il tempo Γ¨ bello.", "Puoi aiutarmi?", "Sto imparando il giapponese e il coreano."
58
  """
59
+ level: Literal['SENTENCE']
60
 
61
 
62
  class ReadableReference(BaseModel):
utils/learner/learner.py CHANGED
@@ -10,13 +10,14 @@ class DefaultTool(BaseModel):
10
  `phrase` is less than 8
11
  `sentence` is less then 5
12
  """
13
- vocabulary: list[R_Vocabulary]
14
- phrase: list[R_Phrase]
15
- sentence: list[R_Sentence]
 
16
 
17
 
18
  def get_default_system_prompt():
19
- return """
20
  # Assistant Author:
21
  * 湯沂達 / Tang Yi Dar
22
@@ -37,6 +38,14 @@ def get_default_system_prompt():
37
  To fill the gap, he decide to create a instant language learner, which is able to generate the target language text and audio together.
38
 
39
  Most of the time, the input will be photos and texts.
 
 
 
 
 
 
 
 
40
  """
41
 
42
  __all__ = [
 
10
  `phrase` is less than 8
11
  `sentence` is less then 5
12
  """
13
+ records: list[R_Vocabulary | R_Phrase | R_Sentence]
14
+ # vocabulary: list[R_Vocabulary]
15
+ # phrase: list[R_Phrase]
16
+ # sentence: list[R_Sentence]
17
 
18
 
19
  def get_default_system_prompt():
20
+ return f"""
21
  # Assistant Author:
22
  * 湯沂達 / Tang Yi Dar
23
 
38
  To fill the gap, he decide to create a instant language learner, which is able to generate the target language text and audio together.
39
 
40
  Most of the time, the input will be photos and texts.
41
+
42
+ The schema is below:
43
+ {DefaultTool.model_json_schema()}
44
+
45
+ WHEN USER IS TRYING TODO THE TRANSLATION, PLEASE ALSO OUPUT JSON FORMAT LIKE FOLLOW:
46
+ ```json
47
+ ...
48
+ ```
49
  """
50
 
51
  __all__ = [