davidlms commited on
Commit
a5b8859
·
verified ·
1 Parent(s): 51451ea

Upload 15 files

Browse files
aphra/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Aphra package initializer.
3
+
4
+ This module exposes the translate function from the translate module.
5
+ """
6
+
7
+ from .translate import translate
8
+ from . import llm_client
9
+ from . import prompts
10
+ from . import parsers
11
+
12
+ __all__ = ['translate', 'llm_client', 'prompts', 'parsers']
aphra/llm_client.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Module for interacting with the model via the OpenRouter API.
3
+ """
4
+
5
+ import logging
6
+ import toml
7
+ import requests
8
+ from openai import OpenAI
9
+
10
+ class LLMModelClient:
11
+ """
12
+ A client for interacting with the model via the OpenRouter API.
13
+ """
14
+
15
+ def __init__(self, config_file):
16
+ """
17
+ Initializes the LLMModelClient with the configuration from a file.
18
+
19
+ :param config_file: Path to the TOML file containing the configuration.
20
+ """
21
+ self.load_config(config_file)
22
+ self.client = OpenAI(
23
+ base_url="https://openrouter.ai/api/v1",
24
+ api_key=self.api_key_openrouter
25
+ )
26
+ self.logging_configured = False
27
+
28
+ def load_config(self, config_file_path):
29
+ """
30
+ Loads configuration from a TOML file.
31
+
32
+ :param config_file_path: Path to the TOML file.
33
+ """
34
+ try:
35
+ with open(config_file_path, 'r', encoding='utf-8') as file:
36
+ config = toml.load(file)
37
+ self.api_key_openrouter = config['openrouter']['api_key']
38
+ self.llms = config['llms']
39
+ except FileNotFoundError:
40
+ logging.error('File not found: %s', config_file_path)
41
+ raise
42
+ except toml.TomlDecodeError:
43
+ logging.error('Error decoding TOML file: %s', config_file_path)
44
+ raise
45
+ except KeyError as e:
46
+ logging.error('Missing key in config file: %s', e)
47
+ raise
48
+
49
+ def call_model(self, system_prompt, user_prompt, model_name, log_call=False):
50
+ """
51
+ Calls the model with the provided prompts.
52
+
53
+ :param system_prompt: The system prompt to set the context for the model.
54
+ :param user_prompt: The user prompt to send to the model.
55
+ :param model_name: The name of the model to use.
56
+ :param log_call: Boolean indicating whether to log the call details.
57
+ :return: The model's response.
58
+ """
59
+ try:
60
+ response = self.client.chat.completions.create(
61
+ model=model_name,
62
+ messages=[
63
+ {"role": "system", "content": system_prompt},
64
+ {"role": "user", "content": user_prompt}
65
+ ]
66
+ )
67
+ response_content = response.choices[0].message.content
68
+
69
+ if log_call:
70
+ self.log_model_call(user_prompt, response_content)
71
+
72
+ return response_content
73
+ except requests.exceptions.RequestException as e:
74
+ logging.error('Request error: %s', e)
75
+ raise
76
+ except (ValueError, KeyError, TypeError) as e:
77
+ logging.error('Error parsing response: %s', e)
78
+ logging.error('Response content: %s', response.text if response else 'No response')
79
+ raise
80
+
81
+ def log_model_call(self, user_prompt, response):
82
+ """
83
+ Logs the details of a model call to a log file.
84
+
85
+ :param user_prompt: The user prompt sent to the model.
86
+ :param response: The response received from the model.
87
+ """
88
+ if not self.logging_configured:
89
+ logging.basicConfig(filename='aphra.log', level=logging.INFO,
90
+ format='%(asctime)s - %(levelname)s - %(message)s')
91
+ self.logging_configured = True
92
+
93
+ logging.info("\nUSER_PROMPT\n")
94
+ logging.info(user_prompt)
95
+ logging.info("\nRESPONSE\n")
96
+ logging.info(response)
aphra/parsers.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Module for parsing analysis and translation strings.
3
+ """
4
+
5
+ import xml.etree.ElementTree as ET
6
+ import logging
7
+
8
+ def parse_analysis(analysis_str):
9
+ """
10
+ Parses the analysis part of the provided string and returns
11
+ a list of items with their names and keywords.
12
+
13
+ :param analysis_str: String containing the analysis in the specified format.
14
+ :return: A list of dictionaries, each containing 'name' and 'keywords' from the analysis.
15
+ """
16
+ try:
17
+ # Extract the <analysis> part
18
+ analysis_start = analysis_str.index("<analysis>") + len("<analysis>")
19
+ analysis_end = analysis_str.index("</analysis>")
20
+ analysis_content = analysis_str[analysis_start:analysis_end].strip()
21
+
22
+ # Parse the analysis content using XML parser
23
+ root = ET.fromstring(f"<root>{analysis_content}</root>")
24
+ items = []
25
+
26
+ for item in root.findall('item'):
27
+ name = item.find('name').text
28
+ keywords = item.find('keywords').text
29
+ items.append({'name': name, 'keywords': keywords.split(', ')})
30
+
31
+ return items
32
+ except ValueError as e:
33
+ logging.error('Error parsing analysis string: %s', e)
34
+ return []
35
+ except ET.ParseError as e:
36
+ logging.error('Error parsing XML content: %s', e)
37
+ return []
38
+
39
+ def parse_translation(translation_str):
40
+ """
41
+ Parses the provided string and returns the content within
42
+ <improved_translation> and <translators_notes> tags.
43
+
44
+ :param translation_str: String containing the translation and notes in the specified format.
45
+ :return: String containing the <improved_translation>.
46
+ """
47
+ try:
48
+ improved_translation_start = (
49
+ translation_str.index("<improved_translation>") + len("<improved_translation>")
50
+ )
51
+ improved_translation_end = translation_str.index("</improved_translation>")
52
+ improved_translation_content = translation_str[
53
+ improved_translation_start:improved_translation_end
54
+ ].strip()
55
+
56
+ return improved_translation_content
57
+ except ValueError as e:
58
+ logging.error('Error parsing translation string: %s', e)
59
+ return "", ""
aphra/prompts.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Module for reading and formatting prompt templates.
3
+ """
4
+
5
+ from pkg_resources import resource_filename
6
+
7
+ def get_prompt(file_name, **kwargs):
8
+ """
9
+ Reads a prompt template from a file and formats it with the given arguments.
10
+
11
+ :param file_name: Path to the file containing the prompt template.
12
+ :param kwargs: Optional keyword arguments to format the prompt template.
13
+ :return: The formatted prompt.
14
+ """
15
+ file_path = resource_filename(__name__, f'prompts/{file_name}')
16
+ with open(file_path, 'r', encoding="utf-8") as file:
17
+ content = file.read()
18
+ if kwargs:
19
+ formatted_prompt = content.format(**kwargs)
20
+ else:
21
+ formatted_prompt = content
22
+ return formatted_prompt
aphra/prompts/step1_system.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ You are an expert translator tasked with analyzing and understanding a {source_language} text. Your goal is to identify specific terms, legal {source_language} terms, phrases, and cultural references that may need explanation or adaptation for an {target_language}-speaking audience.
aphra/prompts/step1_user.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Here is the {source_language} text you need to analyze:
2
+
3
+ <{source_language}_text>
4
+ {post_content}
5
+ </{source_language}_text>
6
+
7
+ Please follow these steps:
8
+
9
+ 1. Carefully read and analyze the {source_language} text.
10
+
11
+ 2. Identify and list any terms, phrases, or cultural references that may be difficult for an {target_language}-speaking audience to understand. This may include:
12
+ - Idiomatic expressions
13
+ - Legal {source_language} terms
14
+ - Culturally specific terms or concepts
15
+ - Historical or geographical references
16
+ - Wordplay or puns that don't translate directly
17
+
18
+ The choices must be present in the text.
19
+
20
+ Present your analysis in the following format:
21
+
22
+ <reasoning>
23
+ Reasoning about the suitability of the chosen terms and/or phrases.
24
+ </reasoning>
25
+
26
+ <analysis>
27
+ <item><name>{source_language} term/phrase 1</name><keywords>keywords that you would use in a search engine to get the proper context of the term</keywords></item>
28
+ <item><name>{source_language} term/phrase 1</name><keywords>keywords that you would use in a search engine to get the proper context of the term</keywords></item>
29
+ (Continue for all identified elements)
30
+ </analysis>
31
+
32
+ Remember to be thorough in your analysis and explanations, considering both linguistic and cultural aspects of the text.
aphra/prompts/step2_system.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ You are tasked with searching for information about a specific term, taking into account provided keywords, to assist a {source_language} to {target_language} translator in making the most reliable and contextualized translation possible. Your goal is to provide comprehensive context and relevant information that will help the translator understand the nuances and cultural implications of the term.
aphra/prompts/step2_user.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The term to be searched is:
2
+ <term>
3
+ {term}
4
+ </term>
5
+
6
+ The keywords to consider for context are:
7
+ <keywords>
8
+ {keywords}
9
+ </keywords>
10
+
11
+ Follow these steps to complete the task:
12
+
13
+ 1. Conduct a thorough search for information about the term, paying special attention to its usage in {source_language}-speaking contexts.
14
+
15
+ 2. Consider the provided keywords and how they relate to the term. Look for connections between the term and these keywords to provide a more focused context.
16
+
17
+ 3. Gather information from reliable sources, including dictionaries, academic papers, news articles, and cultural references.
18
+
19
+ 4. Organize the information you find into the following categories:
20
+ a. Definition and literal meaning
21
+ b. Cultural context and usage
22
+ c. Regional variations (if applicable)
23
+ d. Historical background (if relevant)
24
+ e. Related terms or concepts
25
+ f. Examples of usage in sentences or phrases
26
+
27
+ 5. Provide any additional information that might be helpful for a translator, such as potential false friends, idiomatic expressions, or common translation pitfalls related to this term.
28
+
29
+ 6. If the term has multiple meanings or uses, make sure to cover all relevant interpretations, especially those that might be influenced by the provided keywords.
30
+
31
+ Present your findings in a clear, concise manner, using bullet points where appropriate. Begin your response with an opening statement that introduces the term and its general meaning or significance.
32
+
33
+ Provide your complete response within <search_results> tags. This will allow the translator to easily identify and utilize the information you've gathered.
34
+
35
+ Remember, your goal is to provide comprehensive context that will enable the translator to make informed decisions about the most appropriate translation of the term, considering its cultural and linguistic nuances.
aphra/prompts/step3_system.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ You are tasked with translating a {source_language} text into {target_language} while maintaining the author's original writing style.
aphra/prompts/step3_user.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Here is the {source_language} text to be translated:
2
+
3
+ <{source_language}_text>
4
+ {text}
5
+ </{source_language}_text>
6
+
7
+ Your goal is to produce an accurate {target_language} translation that preserves the nuances, tone, and stylistic elements of the original {source_language} text. Follow these steps:
8
+
9
+ 1. Carefully read the {source_language} text and analyze the author's writing style. Pay attention to:
10
+ - Sentence structure and length
11
+ - Word choice and level of formality
12
+ - Use of literary devices or figurative language
13
+ - Rhythm and flow of the text
14
+ - Any unique or distinctive elements of the author's voice
15
+
16
+ 2. Begin the translation process:
17
+ - Translate the text sentence by sentence, ensuring accuracy of meaning
18
+ - Choose {target_language} words and phrases that best capture the tone and style of the original
19
+ - Maintain similar sentence structures where possible, unless it compromises clarity in {target_language}
20
+ - Preserve any idiomatic expressions, metaphors, or cultural references, adapting them if necessary to make sense in {target_language} while retaining their essence
21
+
22
+ 3. After completing the translation, review it to ensure it reads naturally in {target_language} while still echoing the original {source_language} style.
23
+
24
+ 4. Provide your {target_language} translation within <translation> tags.
25
+
26
+ 5. After the translation, briefly explain (in 2-3 sentences) how you maintained the author's writing style in your translation. Include this explanation within <style_explanation> tags.
27
+
28
+ Remember, the goal is not just to convey the meaning, but to do so in a way that an {target_language} reader would have a similar experience to a {source_language} reader of the original text.
aphra/prompts/step4_system.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ You are a professional translator and language expert specializing in {source_language} to {target_language} translations. Your task is to critically analyze a basic {target_language} translation of a {source_language} text and provide suggestions for improvement. You will also identify terms that would benefit from translator's notes for better understanding.
aphra/prompts/step4_user.txt ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Here is the original {source_language} text:
2
+ <{source_language}_text>
3
+ {text}
4
+ </{source_language}_text>
5
+
6
+ Here is the basic {target_language} translation:
7
+ <{target_language}_translation>
8
+ {translation}
9
+ </{target_language}_translation>
10
+
11
+ Here is a glossary of terms from the original text, explained and contextualized for a better translation:
12
+ <glossary>
13
+ {glossary}
14
+ </glossary>
15
+
16
+ Please follow these steps to complete your task:
17
+
18
+ 1. Carefully read the {source_language} text, the {target_language} translation, and the glossary.
19
+
20
+ 2. Analyze the translation for accuracy, fluency, and cultural appropriateness. Consider the following aspects:
21
+ - Semantic accuracy: Does the translation convey the same meaning as the original?
22
+ - Grammar and syntax: Is the {target_language} grammatically correct and natural-sounding?
23
+ - Idiomatic expressions: Are {source_language} idioms appropriately translated or adapted?
24
+ - Cultural nuances: Are cultural references accurately conveyed or explained?
25
+ - Terminology: Is specialized vocabulary correctly translated, especially considering the provided glossary?
26
+
27
+ 3. Identify terms or concepts that would benefit from a translator's note. These may include:
28
+ - Cultural references that may not be familiar to the target audience
29
+ - Words or phrases with multiple meanings or connotations in {source_language}
30
+ - Concepts that require additional context for full understanding
31
+
32
+ 4. Provide your criticism and suggestions in the following format:
33
+
34
+ <translation_critique>
35
+ <improvements>
36
+ [List specific suggestions for improving the translation, with explanations for each suggestion]
37
+ </improvements>
38
+
39
+ <translator_notes>
40
+ [List terms or concepts that should have a translator's note, explaining why each note is necessary and what information it should include]
41
+ </translator_notes>
42
+ </translation_critique>
43
+
44
+ Be thorough in your analysis, but also concise in your explanations. Focus on the most important improvements and notes that would significantly enhance the quality and clarity of the translation.
aphra/prompts/step5_system.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ You are tasked with creating an improved {target_language} translation of a {source_language} text. You will be provided with several pieces of information to help you create this translation.
aphra/prompts/step5_user.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Follow these steps carefully:
2
+
3
+ 1. First, read the original {source_language} text:
4
+ <original_{source_language}>
5
+ {text}
6
+ </original_{source_language}>
7
+
8
+ 2. Next, review the basic {target_language} translation:
9
+ <basic_translation>
10
+ {translation}
11
+ </basic_translation>
12
+
13
+ 3. Carefully study the glossary of terms, which provides explanations and context for better translation:
14
+ <glossary>
15
+ {glossary}
16
+ </glossary>
17
+
18
+ 4. Consider the critique of the basic translation:
19
+ <translation_critique>
20
+ {critique}
21
+ </translation_critique>
22
+
23
+ 5. Now, create a new translation taking into account the glossary of terms and the critique. Remember to maintain the author's original style. Pay close attention to the nuances and context provided in the glossary and address the issues raised in the critique.
24
+
25
+ 6. If it is necessary to make a clarification through a translator's note, do so by inserting a numbered reference in square brackets immediately after the term that needs clarification. For example: "Term[1] that needs clarification in the text."
26
+
27
+ 7. After completing your translation, add a "Translator's notes" section at the end of the document. List each numbered note with its corresponding explanation. For example:
28
+
29
+ Translator's notes:
30
+ [1] Description of the note that clarifies term 1.
31
+ [2] Description of the note that clarifies term 2.
32
+
33
+ 8. Present your final output in the following format:
34
+ <improved_translation>
35
+ Your new {target_language} translation, including any numbered references for translator's notes.
36
+
37
+ List your numbered translator's notes here, if any.
38
+ </improved_translation>
39
+
40
+ Remember to carefully consider the context, maintain the author's style, and address the issues raised in the critique while creating your improved translation.
aphra/translate.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Module for translating text using multiple steps and language models.
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+ from .llm_client import LLMModelClient
7
+ from .prompts import get_prompt
8
+ from .parsers import parse_analysis, parse_translation
9
+
10
+ @dataclass
11
+ class TranslationContext:
12
+ """
13
+ Context for translation containing parameters and settings.
14
+
15
+ This class encapsulates the parameters and settings needed for performing a translation,
16
+ including the model client, source and target languages, and logging preferences.
17
+ """
18
+ model_client: LLMModelClient
19
+ source_language: str
20
+ target_language: str
21
+ log_calls: bool
22
+
23
+ def load_model_client(config_file):
24
+ """
25
+ Loads the LLMModelClient with the provided configuration file.
26
+
27
+ :param config_file: Path to the TOML file containing the configuration.
28
+ :return: An instance of LLMModelClient initialized with the provided configuration.
29
+ """
30
+ return LLMModelClient(config_file)
31
+
32
+ def execute_model_call(context, system_file, user_file, model_name, **kwargs):
33
+ """
34
+ Executes a model call using the provided system and user prompts.
35
+
36
+ :param context: An instance of TranslationContext containing translation parameters.
37
+ :param system_file: Path to the file containing the system prompt.
38
+ :param user_file: Path to the file containing the user prompt.
39
+ :param model_name: The name of the model to use.
40
+ :param kwargs: Optional keyword arguments to format the prompt templates.
41
+ :return: The model's response content.
42
+ """
43
+ system_prompt = get_prompt(system_file, **kwargs)
44
+ user_prompt = get_prompt(user_file, **kwargs)
45
+ return context.model_client.call_model(
46
+ system_prompt,
47
+ user_prompt,
48
+ model_name,
49
+ log_call=context.log_calls
50
+ )
51
+
52
+ def generate_glossary(context, parsed_items, model_searcher):
53
+ """
54
+ Generates a glossary of terms based on the parsed analysis items.
55
+
56
+ :param context: An instance of TranslationContext containing translation parameters.
57
+ :param parsed_items: A list of dictionaries containing 'name' and 'keywords' for each item.
58
+ :param model_searcher: The name of the model to use for searching term explanations.
59
+ :return: A formatted string containing the glossary entries.
60
+ """
61
+ glossary = []
62
+ for item in parsed_items:
63
+ term_explanation = execute_model_call(
64
+ context,
65
+ 'step2_system.txt',
66
+ 'step2_user.txt',
67
+ model_searcher,
68
+ term=item['name'],
69
+ keywords=", ".join(item['keywords']),
70
+ source_language=context.source_language,
71
+ target_language=context.target_language
72
+ )
73
+ glossary_entry = (
74
+ f"### {item['name']}\n\n**Keywords:** {', '.join(item['keywords'])}\n\n"
75
+ f"**Explanation:**\n{term_explanation}\n"
76
+ )
77
+ glossary.append(glossary_entry)
78
+ return "\n".join(glossary)
79
+
80
+ def translate(source_language, target_language, text, config_file="config.toml", log_calls=False):
81
+ """
82
+ Translates the provided text from the source language to the target language in multiple steps.
83
+
84
+ :param source_language: The source language of the text.
85
+ :param target_language: The target language of the text.
86
+ :param text: The text to be translated.
87
+ :param config_file: Path to the TOML file containing the configuration.
88
+ :param log_calls: Boolean indicating whether to log the call details.
89
+ :return: The improved translation of the text.
90
+ """
91
+ model_client = load_model_client(config_file)
92
+ models = model_client.llms
93
+ context = TranslationContext(model_client, source_language, target_language, log_calls)
94
+
95
+ analysis_content = execute_model_call(
96
+ context,
97
+ 'step1_system.txt',
98
+ 'step1_user.txt',
99
+ models['writer'],
100
+ post_content=text,
101
+ source_language=source_language,
102
+ target_language=target_language
103
+ )
104
+
105
+ parsed_items = parse_analysis(analysis_content)
106
+ glossary_content = generate_glossary(
107
+ context, parsed_items, models['searcher']
108
+ )
109
+
110
+ translated_content = execute_model_call(
111
+ context,
112
+ 'step3_system.txt',
113
+ 'step3_user.txt',
114
+ models['writer'],
115
+ text=text,
116
+ source_language=source_language,
117
+ target_language=target_language
118
+ )
119
+
120
+ critique = execute_model_call(
121
+ context,
122
+ 'step4_system.txt',
123
+ 'step4_user.txt',
124
+ models['critiquer'],
125
+ text=text,
126
+ translation=translated_content,
127
+ glossary=glossary_content,
128
+ source_language=source_language,
129
+ target_language=target_language
130
+ )
131
+
132
+ final_translation_content = execute_model_call(
133
+ context,
134
+ 'step5_system.txt',
135
+ 'step5_user.txt',
136
+ models['writer'],
137
+ text=text,
138
+ translation=translated_content,
139
+ glossary=glossary_content,
140
+ critique=critique,
141
+ source_language=source_language,
142
+ target_language=target_language
143
+ )
144
+
145
+ improved_translation = parse_translation(final_translation_content)
146
+
147
+ return improved_translation