Spaces:
Running
Running
github-actions[bot]
commited on
Commit
·
2797a7a
0
Parent(s):
Sync from https://github.com/ismailsimsek/aistorybooks
Browse files- .github/workflows/hugging-face-sync.yml +51 -0
- .gitignore +3 -0
- README.md +110 -0
- aistorybooks.iml +12 -0
- aistorybooks/__init__.py +0 -0
- aistorybooks/autogena/__init__.py +0 -0
- aistorybooks/autogena/classic_stories.py +128 -0
- aistorybooks/config.py +28 -0
- aistorybooks/crewaia/__init__.py +0 -0
- aistorybooks/crewaia/classic_poems.py +211 -0
- aistorybooks/crewaia/classic_stories.py +203 -0
- aistorybooks/crewaia/tools.py +112 -0
- aistorybooks/phidataa/__init__.py +0 -0
- aistorybooks/phidataa/classic_stories.py +114 -0
- aistorybooks/utils.py +39 -0
- app.py +290 -0
- poem.md +42 -0
- requirements.txt +17 -0
- story.md +86 -0
- tests/__init__.py +0 -0
- tests/test_phidata.py +41 -0
- tests/test_utils.py +50 -0
.github/workflows/hugging-face-sync.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to Hugging Face Spaces
|
2 |
+
|
3 |
+
on:
|
4 |
+
workflow_dispatch:
|
5 |
+
push:
|
6 |
+
branches:
|
7 |
+
- main
|
8 |
+
|
9 |
+
jobs:
|
10 |
+
sync:
|
11 |
+
name: Sync
|
12 |
+
runs-on: ubuntu-latest
|
13 |
+
|
14 |
+
steps:
|
15 |
+
- name: Checkout Repository
|
16 |
+
uses: actions/checkout@v3
|
17 |
+
|
18 |
+
- name: Remove bad files
|
19 |
+
run: rm -rf poem.pdf images/*png images/*gif tests/resources/LoremIpsum.pdf
|
20 |
+
|
21 |
+
- name: Sync to Hugging Face Spaces
|
22 |
+
uses: JacobLinCool/huggingface-sync@v1
|
23 |
+
with:
|
24 |
+
github: ${{ secrets.GITHUB_TOKEN }}
|
25 |
+
user: ismailsimsek # Hugging Face username or organization name
|
26 |
+
space: aistorybooks # Hugging Face space name
|
27 |
+
token: ${{ secrets.HF_TOKEN }} # Hugging Face token
|
28 |
+
###################################
|
29 |
+
emoji: 📖
|
30 |
+
# Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray).
|
31 |
+
colorFrom: blue
|
32 |
+
colorTo: green
|
33 |
+
# Can be either gradio, streamlit, docker, or static.
|
34 |
+
sdk: streamlit
|
35 |
+
# Any valid Python 3.x or 3.x.x version.
|
36 |
+
# python_version: # optional
|
37 |
+
# # Specify the version of the selected SDK (Streamlit or Gradio). All versions of Gradio are supported. Streamlit versions are supported from 0.79.0 to 1.19.0.
|
38 |
+
sdk_version: 1.44.0
|
39 |
+
# Path to your main application file (which contains either gradio or streamlit Python code, or static html code). Path is relative to the root of the repository.
|
40 |
+
app_file: app.py
|
41 |
+
# # Port on which your application is running. Used only if sdk is docker.
|
42 |
+
# app_port: # optional
|
43 |
+
# # For non-static spaces, initial url to render. Needs to start with /. For static spaces, use app_file instead.
|
44 |
+
# base_path: # optional
|
45 |
+
# # Whether your Space is rendered inside a full-width (when true) or fixed-width column (ie. “container” CSS) inside the iframe. Defaults to false in gradio and streamlit, and to true for other sdks.
|
46 |
+
# fullWidth: # optional
|
47 |
+
# # Whether the Space stays on top of your profile. Can be useful if you have a lot of Spaces so you and others can quickly see your best Space.
|
48 |
+
pinned: true
|
49 |
+
title: AI-Powered Storybooks
|
50 |
+
short_description: AI-Powered Storybook/Poem Generation for Language Learners
|
51 |
+
license: apache-2.0
|
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
*api-key.txt
|
2 |
+
.idea
|
3 |
+
run.sh
|
README.md
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: AI-Powered Storybooks
|
3 |
+
emoji: 📖
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.44.0
|
8 |
+
app_file: app.py
|
9 |
+
short_description: AI-Powered Storybook/Poem Generation for Language Learners
|
10 |
+
pinned: true
|
11 |
+
tags: [ "llm","llm-agents" ]
|
12 |
+
---
|
13 |
+
|
14 |
+
# AI-Powered Storybook and Poem Generation for Language Learners
|
15 |
+
|
16 |
+
This project explores the use of AI agents and various agentic frameworks to create engaging learning materials. It
|
17 |
+
experiments with different agentic flows and tool integrations to enhance the learning experience.
|
18 |
+
|
19 |
+
The primary focus is to assist language learners by generating storybooks and poems tailored to their specific needs.
|
20 |
+
For example, users can transform classic novels into storybooks adapted for language learning, customized to their
|
21 |
+
desired proficiency level.
|
22 |
+
|
23 |
+
You can interact with and use the live application directly on [Hugging Face Spaces](https://huggingface.co/spaces/ismailsimsek/aistorybooks)
|
24 |
+
|
25 |
+

|
26 |
+
|
27 |
+
## Classics to Story Book Generators
|
28 |
+
|
29 |
+
This tools simplifies classic novels into engaging storybooks. You can choose the target language, reading level,
|
30 |
+
and even writing style and saves the final product as a convenient markdown file.
|
31 |
+
|
32 |
+
Perfect for language learners to experience the joy of classic stories in a language that supports their learning
|
33 |
+
journey.
|
34 |
+
|
35 |
+
### Classics to Story Book Generator V2
|
36 |
+
|
37 |
+
This version of the storybook generator takes a PDF file of a classic novel as input and transforms it into a storybook
|
38 |
+
tailored to your specifications. It leverages advanced language processing to summarize the content, adjust the reading
|
39 |
+
level, and adapt the writing style to your preferences. This version is designed to work directly with the content of
|
40 |
+
the provided PDF.
|
41 |
+
|
42 |
+
**Key Features:**
|
43 |
+
|
44 |
+
* **PDF Input:** Accepts a PDF file of a classic novel as the primary input source.
|
45 |
+
* **Customizable Output:** Allows you to specify the target language, reading level (e.g., A1 Intermediate), summary
|
46 |
+
size (e.g., Long with 150 sentences/1200 words), and writing style (e.g., Philosophical).
|
47 |
+
* **Chunk-Based Processing:** Processes the PDF content in chunks, allowing for efficient handling
|
48 |
+
|
49 |
+
```python
|
50 |
+
from aistorybooks.phidataa.classic_stories import PhiStoryBookGenerator
|
51 |
+
from pathlib import Path
|
52 |
+
|
53 |
+
generator = PhiStoryBookGenerator(
|
54 |
+
language="German",
|
55 |
+
level="A1 Intermediate",
|
56 |
+
summary_size="Long (150 sentences/1200 words)",
|
57 |
+
writing_style="Philosophical",
|
58 |
+
)
|
59 |
+
pdf_file = Path("The-Brothers-Karamazov.pdf")
|
60 |
+
results = generator.run(pdf_file=pdf_file, chunk_size=1, padding=0, skip_first_n_pages=0)
|
61 |
+
|
62 |
+
for response in results:
|
63 |
+
print(response.content)
|
64 |
+
|
65 |
+
```
|
66 |
+
|
67 |
+
### Classics to Story Book Generator V1
|
68 |
+
|
69 |
+
This version uses llm knowledge to generate the story, and it adds llm generated illustration related to story content,
|
70 |
+
to the final markdown file.
|
71 |
+
|
72 |
+
Example Output:
|
73 |
+
|
74 |
+
- [story.md](story.md)
|
75 |
+
|
76 |
+
Usage:
|
77 |
+
|
78 |
+
```python
|
79 |
+
from aistorybooks.crewaia.classic_stories import StoryBookGenerator
|
80 |
+
|
81 |
+
generator = StoryBookGenerator(book="The Karamazov Brothers",
|
82 |
+
author="Fyodor Dostoevsky",
|
83 |
+
language="German",
|
84 |
+
level="A2 Beginner",
|
85 |
+
summary_size="10 Chapters, each chapter more than 100 sentences log",
|
86 |
+
writing_style="Philosophical")
|
87 |
+
generator.generate()
|
88 |
+
```
|
89 |
+
|
90 |
+
## Classics to Poem Generator
|
91 |
+
|
92 |
+
Transforms a classic book into a poem and saves it as markdown and PDF, with and llm generated illustration.
|
93 |
+
|
94 |
+
**Specify the poem style**: Mention the specific poetic style (e.g., "Alexander Pushkin") to get specific poetic style.
|
95 |
+
|
96 |
+
Example Output:
|
97 |
+
|
98 |
+
- [poem.md](poem.md)
|
99 |
+
- [poem.pdf](poem.pdf)
|
100 |
+
|
101 |
+
Usage:
|
102 |
+
|
103 |
+
```python
|
104 |
+
from aistorybooks.crewaia.classic_poems import ClassicPoemGenerator
|
105 |
+
|
106 |
+
generator = ClassicPoemGenerator(book="The Karamazov Brothers",
|
107 |
+
author="Fyodor Dostoevsky",
|
108 |
+
poetic_style="Alexander Pushkin and Philosophical")
|
109 |
+
generator.generate()
|
110 |
+
```
|
aistorybooks.iml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
4 |
+
<exclude-output />
|
5 |
+
<content url="file://$MODULE_DIR$">
|
6 |
+
<sourceFolder url="file://$MODULE_DIR$/aistorybooks" isTestSource="false" />
|
7 |
+
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
|
8 |
+
</content>
|
9 |
+
<orderEntry type="inheritedJdk" />
|
10 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
11 |
+
</component>
|
12 |
+
</module>
|
aistorybooks/__init__.py
ADDED
File without changes
|
aistorybooks/autogena/__init__.py
ADDED
File without changes
|
aistorybooks/autogena/classic_stories.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import chromadb
|
2 |
+
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
import autogen
|
6 |
+
from config import Config
|
7 |
+
|
8 |
+
config_list = [
|
9 |
+
# {"model": "gpt-3.5-turbo-0125", "api_key": Config.OPENAI_API_KEY},
|
10 |
+
{"model": Config.GROQ_MODEL_NAME,
|
11 |
+
"api_key": Config.GROQ_API_KEY,
|
12 |
+
"base_url": Config.GROQ_API_BASE_URL
|
13 |
+
}
|
14 |
+
]
|
15 |
+
|
16 |
+
|
17 |
+
class Classics2StoryBookGenerator:
|
18 |
+
CURRENT_DIR: Path = Path(__file__).parent
|
19 |
+
|
20 |
+
def __init__(self, book: str, author: str, language: str = "English", level: str = "A2 Beginner",
|
21 |
+
summary_size: str = "10 Chapter, each chapter with 100 sentences", writing_style: str = "Funny"):
|
22 |
+
self.book = book
|
23 |
+
self.author = author
|
24 |
+
self.language = language
|
25 |
+
self.level = level
|
26 |
+
self.summary_size = summary_size
|
27 |
+
self.writing_style = writing_style
|
28 |
+
|
29 |
+
self.llm_config = {"config_list": config_list, "cache_seed": 42}
|
30 |
+
self.human_admin = autogen.UserProxyAgent(
|
31 |
+
name="Admin",
|
32 |
+
system_message="A human admin, who ask questions and give tasks.",
|
33 |
+
human_input_mode="NEVER",
|
34 |
+
code_execution_config={"use_docker": False}, # we don't want to execute code in this case.
|
35 |
+
)
|
36 |
+
|
37 |
+
self.planner = autogen.AssistantAgent(
|
38 |
+
name="Planner",
|
39 |
+
system_message="""
|
40 |
+
Planner. Suggest a plan. Revise the plan based on feedback from admin and critic, until admin approval.
|
41 |
+
The plan may involve multiple agents.
|
42 |
+
Explain the plan first. Be clear which step is performed by each agent.
|
43 |
+
""",
|
44 |
+
llm_config=self.llm_config,
|
45 |
+
)
|
46 |
+
|
47 |
+
self.critic = autogen.AssistantAgent(
|
48 |
+
name="Critic",
|
49 |
+
system_message="Critic. Double check plan, claims, summaries from other agents and provide feedback. "
|
50 |
+
"Check whether the plan does whats asked and delegates tasks to all agents.",
|
51 |
+
llm_config=self.llm_config,
|
52 |
+
)
|
53 |
+
|
54 |
+
self.library = RetrieveUserProxyAgent(
|
55 |
+
name="Library",
|
56 |
+
default_auto_reply="Reply `TERMINATE` if the task is done.",
|
57 |
+
system_message="Assistant who has extra content retrieval power for solving difficult problems.",
|
58 |
+
description="Assistant who can retrieve content from documents. "
|
59 |
+
"Help `Summarizer Author` to retrieve story content from the book.",
|
60 |
+
human_input_mode="NEVER",
|
61 |
+
retrieve_config={
|
62 |
+
"task": "qa",
|
63 |
+
"docs_path": self.CURRENT_DIR.joinpath("books/The-Brothers-Karamazov.pdf").as_posix(),
|
64 |
+
"chunk_token_size": 2000,
|
65 |
+
"client": chromadb.PersistentClient(path=self.CURRENT_DIR.joinpath("chromadb").as_posix()),
|
66 |
+
"get_or_create": True,
|
67 |
+
},
|
68 |
+
code_execution_config={"use_docker": False}, # we don't want to execute code in this case.
|
69 |
+
)
|
70 |
+
|
71 |
+
self.summarizer = autogen.AssistantAgent(
|
72 |
+
name="Summarizer Author",
|
73 |
+
llm_config=self.llm_config,
|
74 |
+
system_message="You are an author writing stories with detailed character descriptions "
|
75 |
+
"and the main plot points. ",
|
76 |
+
description="A creative author specialized in writing stories. "
|
77 |
+
"Talk to `Library` to retrive content and write a story. "
|
78 |
+
"Talk to `Translator` to translate the story to another language."
|
79 |
+
)
|
80 |
+
|
81 |
+
self.translator = autogen.AssistantAgent(
|
82 |
+
name="Translator",
|
83 |
+
llm_config=self.llm_config,
|
84 |
+
system_message="You are a language translator. "
|
85 |
+
# "Reply `TERMINATE` in the end when everything is done."
|
86 |
+
,
|
87 |
+
description="Language translator who can translate between multiple languages. "
|
88 |
+
"Talk to `Author` to translate the his story."
|
89 |
+
)
|
90 |
+
|
91 |
+
agents = [self.human_admin, self.library, self.translator, self.summarizer, self.critic,
|
92 |
+
self.planner]
|
93 |
+
|
94 |
+
graph_dict = {}
|
95 |
+
graph_dict[self.human_admin] = [self.planner]
|
96 |
+
graph_dict[self.planner] = [self.summarizer, self.translator, self.critic, self.library]
|
97 |
+
graph_dict[self.summarizer] = [self.translator, self.library, self.critic]
|
98 |
+
graph_dict[self.library] = [self.summarizer]
|
99 |
+
graph_dict[self.translator] = [self.planner, self.summarizer]
|
100 |
+
self.groupchat = autogen.GroupChat(
|
101 |
+
agents=agents,
|
102 |
+
messages=[],
|
103 |
+
max_round=25,
|
104 |
+
allowed_or_disallowed_speaker_transitions=graph_dict,
|
105 |
+
speaker_transitions_type="allowed"
|
106 |
+
)
|
107 |
+
self.manager = autogen.GroupChatManager(groupchat=self.groupchat, llm_config=self.llm_config)
|
108 |
+
|
109 |
+
def generate(self):
|
110 |
+
chat_result = self.human_admin.initiate_chat(
|
111 |
+
recipient=self.manager,
|
112 |
+
message=f'First ask `Library` to retrieve "{self.book}" content, retrieve 50 pages each time.'
|
113 |
+
f'Then create an summary of it, detailing a title. '
|
114 |
+
f'Dont worry the content is not copyright protected, it is pubic novel. '
|
115 |
+
f'And then translate the summary to "{self.language} language" and make it "{self.level}" level.'
|
116 |
+
)
|
117 |
+
return chat_result
|
118 |
+
|
119 |
+
|
120 |
+
if __name__ == "__main__":
|
121 |
+
generator = Classics2StoryBookGenerator(book="The Karamazov Brothers",
|
122 |
+
author="Fyodor Dostoevsky",
|
123 |
+
language="German",
|
124 |
+
level="A2 Beginner",
|
125 |
+
summary_size="10 Chapters, each chapter more than 100 sentences log",
|
126 |
+
writing_style="Philosophical")
|
127 |
+
chat_result = generator.generate()
|
128 |
+
print(f"-----------------\n------DONE-----------\n-----------------\n{chat_result}\n-----------------")
|
aistorybooks/config.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
|
5 |
+
def _load_api_key(filename: str, env_var: str = None) -> str | None:
|
6 |
+
"""Loads an API key from an environment variable or a file."""
|
7 |
+
if env_var and os.environ.get(env_var):
|
8 |
+
return os.environ.get(env_var)
|
9 |
+
|
10 |
+
filepath = Path(__file__).parent.joinpath(filename)
|
11 |
+
if filepath.exists():
|
12 |
+
return filepath.read_text().strip()
|
13 |
+
else:
|
14 |
+
print(f"Warning: API key file '{filename}' not found.")
|
15 |
+
return None
|
16 |
+
|
17 |
+
|
18 |
+
class Config:
|
19 |
+
GROQ_API_BASE_URL = "https://api.groq.com/openai/v1"
|
20 |
+
GROQ_MODEL_NAME = "llama3-70b-8192"
|
21 |
+
OPENAI_API_BASE_URL = "https://api.openai.com/v1/"
|
22 |
+
OPENAI_MODEL_NAME = "gpt-4"
|
23 |
+
OPENAI_MODEL_GPT_4O_MINI_NAME = "gpt-4o-mini"
|
24 |
+
GEMINI_MODEL_NAME = "gemini-2.0-flash-lite"
|
25 |
+
GROQ_API_KEY = _load_api_key("groq-api-key.txt")
|
26 |
+
OPENAI_API_KEY = _load_api_key("openai-api-key.txt")
|
27 |
+
LOCAL_LLM_API_KEY = _load_api_key("local-api-key.txt")
|
28 |
+
GEMINI_API_KEY = _load_api_key("gemini-api-key.txt", env_var="GOOGLE_API_KEY")
|
aistorybooks/crewaia/__init__.py
ADDED
File without changes
|
aistorybooks/crewaia/classic_poems.py
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import List, Optional
|
3 |
+
|
4 |
+
os.environ["OTEL_SDK_DISABLED"] = "true"
|
5 |
+
from crewai import Agent, Task, Crew, Process
|
6 |
+
from crewai import LLM
|
7 |
+
from crewai.agents.agent_builder.base_agent import BaseAgent
|
8 |
+
|
9 |
+
from config import Config # Assuming you have a config.py file
|
10 |
+
from tools import ImageGenerator, MarkdownToPdfConverter
|
11 |
+
|
12 |
+
|
13 |
+
class ClassicPoemGenerator:
|
14 |
+
"""
|
15 |
+
A class to generate a poem based on a classic novel, including image generation,
|
16 |
+
and content formatting.
|
17 |
+
"""
|
18 |
+
|
19 |
+
def __init__(
|
20 |
+
self,
|
21 |
+
book: str,
|
22 |
+
author: str,
|
23 |
+
poetic_style: str = "Philosophical",
|
24 |
+
):
|
25 |
+
"""
|
26 |
+
Initializes the ClassicPoemGenerator with book details and preferences.
|
27 |
+
|
28 |
+
Args:
|
29 |
+
book (str): The title of the classic novel.
|
30 |
+
author (str): The author of the classic novel.
|
31 |
+
poetic_style (str, optional): The poetic style for the poem. Defaults to "Philosophical".
|
32 |
+
"""
|
33 |
+
self.book = book
|
34 |
+
self.author = author
|
35 |
+
self.poetic_style = poetic_style
|
36 |
+
self.llm = self._initialize_llm()
|
37 |
+
self._tasks: Optional[List[Task]] = None
|
38 |
+
self._crew: Optional[Crew] = None
|
39 |
+
|
40 |
+
def _initialize_llm(self) -> LLM:
|
41 |
+
"""Initializes the language model."""
|
42 |
+
return LLM(
|
43 |
+
model="groq/" + Config.GROQ_MODEL_NAME,
|
44 |
+
api_key=Config.GROQ_API_KEY,
|
45 |
+
api_base=Config.GROQ_API_BASE_URL,
|
46 |
+
)
|
47 |
+
|
48 |
+
def _create_poet_agent(self) -> Agent:
|
49 |
+
"""Creates the Poet agent."""
|
50 |
+
return Agent(
|
51 |
+
role="Poet",
|
52 |
+
goal=f"Turns given story, novel or literature content to a beautiful poem.",
|
53 |
+
backstory=f"An talented poet who summarizes novels and turns them to a beautiful, imaginary and "
|
54 |
+
f"metaphorical poems.",
|
55 |
+
verbose=True,
|
56 |
+
llm=self.llm,
|
57 |
+
allow_delegation=False,
|
58 |
+
)
|
59 |
+
|
60 |
+
def _create_creative_poet_agent(self) -> Agent:
|
61 |
+
"""Creates the Creative Poet agent."""
|
62 |
+
return Agent(
|
63 |
+
role="Creative Poet",
|
64 |
+
goal=f"Takes the given poem and improves it with {self.poetic_style} style",
|
65 |
+
backstory=f"A creative author specialized in {self.poetic_style} style poems. "
|
66 |
+
f"Improves poems with {self.poetic_style} elements.",
|
67 |
+
verbose=True,
|
68 |
+
llm=self.llm,
|
69 |
+
allow_delegation=False,
|
70 |
+
)
|
71 |
+
|
72 |
+
def _create_image_generator_agent(self) -> Agent:
|
73 |
+
"""Creates the Image Generator agent."""
|
74 |
+
return Agent(
|
75 |
+
role="Image Generator",
|
76 |
+
goal="Generate one image for the poem written by the creative poet. Final output should "
|
77 |
+
"contain 1 image in json format. Use full poem text to generate the image.",
|
78 |
+
backstory="A creative AI specialized in visual storytelling, bringing each poem to life through "
|
79 |
+
"imaginative imagery.",
|
80 |
+
verbose=True,
|
81 |
+
llm=self.llm,
|
82 |
+
tools=[ImageGenerator()],
|
83 |
+
allow_delegation=False,
|
84 |
+
)
|
85 |
+
|
86 |
+
def _create_content_formatter_agent(self) -> Agent:
|
87 |
+
"""Creates the Content Formatter agent."""
|
88 |
+
return Agent(
|
89 |
+
role="Content Formatter",
|
90 |
+
goal="Format the written story content in markdown, including image at the beginning of each poem.",
|
91 |
+
backstory="A meticulous formatter who enhances the readability and presentation of the storybook.",
|
92 |
+
verbose=True,
|
93 |
+
llm=self.llm,
|
94 |
+
allow_delegation=False,
|
95 |
+
)
|
96 |
+
|
97 |
+
def _create_markdown_to_pdf_creator_agent(self) -> Agent:
|
98 |
+
"""Creates the PDF Converter agent."""
|
99 |
+
return Agent(
|
100 |
+
role="PDF Converter",
|
101 |
+
goal="Convert the Markdown file to a PDF document. poem.md is the markdown file name.",
|
102 |
+
backstory="An efficient converter that transforms Markdown files into professionally formatted PDF "
|
103 |
+
"documents.",
|
104 |
+
verbose=True,
|
105 |
+
llm=self.llm,
|
106 |
+
tools=[MarkdownToPdfConverter()],
|
107 |
+
allow_delegation=False,
|
108 |
+
)
|
109 |
+
|
110 |
+
def _create_create_poem_task(self) -> Task:
|
111 |
+
"""Creates the Create Poem task."""
|
112 |
+
poet_agent = self._create_poet_agent()
|
113 |
+
return Task(
|
114 |
+
description=f"Create an summary of {self.book} from {self.author} as a poem. "
|
115 |
+
f"Giving it a title and character descriptions and making it {self.poetic_style} style poem. "
|
116 |
+
f"Include title of the poem at the top.",
|
117 |
+
agent=poet_agent,
|
118 |
+
expected_output=f"A structured document with the poem and title of the poem at the top.",
|
119 |
+
)
|
120 |
+
|
121 |
+
def _create_improve_poem_task(self, create_poem_task: Task) -> Task:
|
122 |
+
"""Creates the Improve Poem task."""
|
123 |
+
creative_poet_agent = self._create_creative_poet_agent()
|
124 |
+
return Task(
|
125 |
+
description=f"Improve the given poem with {self.poetic_style} style. ",
|
126 |
+
agent=creative_poet_agent,
|
127 |
+
expected_output=f"A improved poem with {self.poetic_style} style.",
|
128 |
+
context=[create_poem_task],
|
129 |
+
)
|
130 |
+
|
131 |
+
def _create_generate_image_task(self, improve_poem_task: Task) -> Task:
|
132 |
+
"""Creates the Generate Image task."""
|
133 |
+
image_generator_agent = self._create_image_generator_agent()
|
134 |
+
return Task(
|
135 |
+
description="Generate 1 image that represents the poem text. "
|
136 |
+
f"Aligning with the {self.poetic_style} theme outlined in the poem. "
|
137 |
+
f"Use the full poem content to generate image. Dont summarize it, ise it as is.",
|
138 |
+
agent=image_generator_agent,
|
139 |
+
expected_output="A digital image file that visually represents the poem.",
|
140 |
+
context=[improve_poem_task],
|
141 |
+
)
|
142 |
+
|
143 |
+
def _create_format_content_task(self, improve_poem_task: Task, generate_image_task: Task) -> Task:
|
144 |
+
"""Creates the Format Content task."""
|
145 |
+
content_formatter_agent = self._create_content_formatter_agent()
|
146 |
+
return Task(
|
147 |
+
description="Format given poem content as a markdown document, "
|
148 |
+
"Including an image after the title and before the poem. Use <br> as a linebreak",
|
149 |
+
agent=content_formatter_agent,
|
150 |
+
expected_output="""The entire poem content formatted in markdown, With linebreak after each verse. image
|
151 |
+
added after the title""",
|
152 |
+
context=[improve_poem_task, generate_image_task],
|
153 |
+
output_file="poem.md",
|
154 |
+
)
|
155 |
+
|
156 |
+
def _create_markdown_to_pdf_task(self, format_content_task: Task) -> Task:
|
157 |
+
"""Creates the Markdown to PDF task."""
|
158 |
+
markdown_to_pdf_creator_agent = self._create_markdown_to_pdf_creator_agent()
|
159 |
+
return Task(
|
160 |
+
description="Convert a Markdown file to a PDF document, ensuring the preservation of formatting, "
|
161 |
+
"structure, and embedded images using the mdpdf library.",
|
162 |
+
agent=markdown_to_pdf_creator_agent,
|
163 |
+
expected_output="A PDF file generated from the Markdown input, accurately reflecting the content with "
|
164 |
+
"proper formatting. The PDF should be ready for sharing or printing.",
|
165 |
+
context=[format_content_task],
|
166 |
+
)
|
167 |
+
|
168 |
+
@property
|
169 |
+
def tasks(self) -> List[Task]:
|
170 |
+
"""Creates all the tasks required for the poem generation."""
|
171 |
+
if self._tasks is None:
|
172 |
+
create_poem_task = self._create_create_poem_task()
|
173 |
+
improve_poem_task = self._create_improve_poem_task(create_poem_task)
|
174 |
+
generate_image_task = self._create_generate_image_task(improve_poem_task)
|
175 |
+
format_content_task = self._create_format_content_task(improve_poem_task, generate_image_task)
|
176 |
+
markdown_to_pdf_task = self._create_markdown_to_pdf_task(format_content_task)
|
177 |
+
|
178 |
+
self._tasks = [create_poem_task, improve_poem_task, generate_image_task, format_content_task,
|
179 |
+
markdown_to_pdf_task]
|
180 |
+
return self._tasks
|
181 |
+
|
182 |
+
@property
|
183 |
+
def agents(self) -> List[BaseAgent]:
|
184 |
+
"""Creates all the agents required for the poem generation."""
|
185 |
+
return [task.agent for task in self.tasks]
|
186 |
+
|
187 |
+
@property
|
188 |
+
def crew(self) -> Crew:
|
189 |
+
"""Creates the Crew with agents and tasks."""
|
190 |
+
if self._crew is None:
|
191 |
+
self._crew = Crew(
|
192 |
+
agents=self.agents,
|
193 |
+
tasks=self.tasks,
|
194 |
+
verbose=True,
|
195 |
+
process=Process.sequential,
|
196 |
+
)
|
197 |
+
return self._crew
|
198 |
+
|
199 |
+
def generate(self):
|
200 |
+
"""Generates the poem by creating agents, tasks, and running the crew."""
|
201 |
+
result = self.crew.kickoff()
|
202 |
+
print(result)
|
203 |
+
|
204 |
+
|
205 |
+
if __name__ == "__main__":
|
206 |
+
generator = ClassicPoemGenerator(
|
207 |
+
book="The Karamazov Brothers",
|
208 |
+
author="Fyodor Dostoevsky",
|
209 |
+
poetic_style="Alexander Pushkin and Philosophical",
|
210 |
+
)
|
211 |
+
generator.generate()
|
aistorybooks/crewaia/classic_stories.py
ADDED
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import List, Optional
|
3 |
+
|
4 |
+
os.environ["OTEL_SDK_DISABLED"] = "true"
|
5 |
+
from crewai import Agent, Task, Crew, Process
|
6 |
+
from crewai import LLM
|
7 |
+
from crewai.agents.agent_builder.base_agent import BaseAgent
|
8 |
+
|
9 |
+
from config import Config # Assuming you have a config.py file
|
10 |
+
from tools import ImageGenerator
|
11 |
+
|
12 |
+
|
13 |
+
class StoryBookGenerator:
|
14 |
+
"""
|
15 |
+
A class to generate a storybook based on a classic novel, including translation,
|
16 |
+
image generation, and content formatting.
|
17 |
+
"""
|
18 |
+
|
19 |
+
def __init__(
|
20 |
+
self,
|
21 |
+
book: str,
|
22 |
+
author: str,
|
23 |
+
language: str = "English",
|
24 |
+
level: str = "A2 Beginner",
|
25 |
+
summary_size: str = "10 Chapter, each chapter with 100 sentences",
|
26 |
+
writing_style: str = "Funny",
|
27 |
+
):
|
28 |
+
"""
|
29 |
+
Initializes the StoryBookGenerator with book details and preferences.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
book (str): The title of the classic novel.
|
33 |
+
author (str): The author of the classic novel.
|
34 |
+
language (str, optional): The target language for translation. Defaults to "English".
|
35 |
+
level (str, optional): The language proficiency level for translation. Defaults to "A2 Beginner".
|
36 |
+
summary_size (str, optional): The desired size of the summary. Defaults to "10 Chapter, each chapter with 100 sentences".
|
37 |
+
writing_style (str, optional): The writing style for the summary. Defaults to "Funny".
|
38 |
+
"""
|
39 |
+
self.book = book
|
40 |
+
self.author = author
|
41 |
+
self.language = language
|
42 |
+
self.level = level
|
43 |
+
self.summary_size = summary_size
|
44 |
+
self.writing_style = writing_style
|
45 |
+
self.llm = self._initialize_llm()
|
46 |
+
self._tasks: Optional[List[Task]] = None
|
47 |
+
self._crew: Optional[Crew] = None
|
48 |
+
|
49 |
+
def _initialize_llm(self) -> LLM:
|
50 |
+
"""Initializes the language model."""
|
51 |
+
return LLM(
|
52 |
+
model="openai/" + Config.OPENAI_MODEL_NAME,
|
53 |
+
api_key=Config.OPENAI_API_KEY,
|
54 |
+
api_base=Config.OPENAI_API_BASE_URL,
|
55 |
+
)
|
56 |
+
# return LLM(
|
57 |
+
# model="groq/" + Config.GROQ_OPENAI_MODEL_NAME,
|
58 |
+
# api_key=Config.GROQ_OPENAI_API_KEY,
|
59 |
+
# api_base=Config.GROQ_OPENAI_API_BASE_URL,
|
60 |
+
# )
|
61 |
+
|
62 |
+
def _create_author_agent(self) -> Agent:
|
63 |
+
"""Creates the Author agent."""
|
64 |
+
return Agent(
|
65 |
+
role="Author",
|
66 |
+
goal=f"Creates {self.summary_size} version of well known novels. Improves the summary with {self.writing_style} style",
|
67 |
+
backstory=f"A creative author specialized in {self.writing_style} style stories. Improves stories with {self.writing_style} elements.",
|
68 |
+
verbose=True,
|
69 |
+
llm=self.llm,
|
70 |
+
allow_delegation=False,
|
71 |
+
)
|
72 |
+
|
73 |
+
def _create_translator_agent(self) -> Agent:
|
74 |
+
"""Creates the Translator agent."""
|
75 |
+
return Agent(
|
76 |
+
role="Translator",
|
77 |
+
goal=f"""Translates given text to {self.language} language.
|
78 |
+
Simplifies the translation to {self.level} level for language learners.""",
|
79 |
+
backstory=f"An talented translator translates english text to {self.language}.",
|
80 |
+
verbose=True,
|
81 |
+
llm=self.llm,
|
82 |
+
allow_delegation=False,
|
83 |
+
)
|
84 |
+
|
85 |
+
def _create_image_generator_agent(self) -> Agent:
|
86 |
+
"""Creates the Image Generator agent."""
|
87 |
+
return Agent(
|
88 |
+
role="Image Generator",
|
89 |
+
goal="Generate image for the given story. Final output should contain 1 image in json format. "
|
90 |
+
"Use full story text to generate the image.",
|
91 |
+
backstory="A creative AI specialized in visual storytelling, bringing each chapter to life through "
|
92 |
+
"imaginative imagery.",
|
93 |
+
verbose=True,
|
94 |
+
llm=self.llm,
|
95 |
+
tools=[ImageGenerator()],
|
96 |
+
allow_delegation=False,
|
97 |
+
)
|
98 |
+
|
99 |
+
def _create_content_formatter_agent(self) -> Agent:
|
100 |
+
"""Creates the Content Formatter agent."""
|
101 |
+
return Agent(
|
102 |
+
role="Content Formatter",
|
103 |
+
goal="Format the written story content in markdown, including images at the beginning of each chapter.",
|
104 |
+
backstory="A meticulous formatter who enhances the readability and presentation of the storybook.",
|
105 |
+
verbose=True,
|
106 |
+
llm=self.llm,
|
107 |
+
allow_delegation=False,
|
108 |
+
)
|
109 |
+
|
110 |
+
def _create_summarize_task(self) -> Task:
|
111 |
+
"""Creates the Summarize task."""
|
112 |
+
author_agent = self._create_author_agent()
|
113 |
+
return Task(
|
114 |
+
description=f"Create an summary of {self.book} from {self.author}, detailing "
|
115 |
+
f"a title and character descriptions. summary size should be {self.summary_size}.",
|
116 |
+
agent=author_agent,
|
117 |
+
expected_output=f"A structured outline story with the length of {self.summary_size}. "
|
118 |
+
f"It includes detailed character descriptions and the main plot points.",
|
119 |
+
)
|
120 |
+
|
121 |
+
def _create_generate_image_task(self, summarize_task: Task) -> Task:
|
122 |
+
"""Creates the Generate Image task."""
|
123 |
+
image_generator_agent = self._create_image_generator_agent()
|
124 |
+
return Task(
|
125 |
+
description="Generate image that represents the story text. "
|
126 |
+
f"Aligning with the theme outlined in the story. "
|
127 |
+
f"Use the full story content to generate image. "
|
128 |
+
f"Dont summarize the content, use full story content.",
|
129 |
+
agent=image_generator_agent,
|
130 |
+
expected_output="A digital image file that visually represents the story.",
|
131 |
+
context=[summarize_task],
|
132 |
+
)
|
133 |
+
|
134 |
+
def _create_translate_task(self, summarize_task: Task) -> Task:
|
135 |
+
"""Creates the Translate task."""
|
136 |
+
translator_agent = self._create_translator_agent()
|
137 |
+
return Task(
|
138 |
+
description=f"""Using the story provided, translate the full story to {self.language} in {self.level} level.
|
139 |
+
add repetition to it to make it better for language learners""",
|
140 |
+
agent=translator_agent,
|
141 |
+
expected_output=f"""A complete manuscript of the storybook in {self.language}.
|
142 |
+
Its in simple language leve land includes repetition for language learners.""",
|
143 |
+
context=[summarize_task],
|
144 |
+
)
|
145 |
+
|
146 |
+
def _create_format_content_task(self, translate_task: Task, generate_image_task: Task) -> Task:
|
147 |
+
"""Creates the Format Content task."""
|
148 |
+
content_formatter_agent = self._create_content_formatter_agent()
|
149 |
+
return Task(
|
150 |
+
description="Format the story content in markdown, including an image at the beginning of the story. "
|
151 |
+
"Use <br> as a linebreak",
|
152 |
+
agent=content_formatter_agent,
|
153 |
+
expected_output="""The entire storybook content formatted in markdown, with imaged added at the beginning
|
154 |
+
of the story.""",
|
155 |
+
context=[translate_task, generate_image_task],
|
156 |
+
output_file="story.md",
|
157 |
+
)
|
158 |
+
|
159 |
+
@property
|
160 |
+
def tasks(self) -> List[Task]:
|
161 |
+
"""Creates all the tasks required for the storybook generation."""
|
162 |
+
if self._tasks is None:
|
163 |
+
summarize_task = self._create_summarize_task()
|
164 |
+
generate_image_task = self._create_generate_image_task(summarize_task)
|
165 |
+
translate_task = self._create_translate_task(summarize_task)
|
166 |
+
format_content_task = self._create_format_content_task(translate_task, generate_image_task)
|
167 |
+
|
168 |
+
self._tasks = [summarize_task, translate_task, generate_image_task, format_content_task]
|
169 |
+
return self._tasks
|
170 |
+
|
171 |
+
@property
|
172 |
+
def agents(self) -> List[BaseAgent]:
|
173 |
+
"""Creates all the agents required for the storybook generation."""
|
174 |
+
return [task.agent for task in self.tasks]
|
175 |
+
|
176 |
+
@property
|
177 |
+
def crew(self) -> Crew:
|
178 |
+
"""Creates the Crew with agents and tasks."""
|
179 |
+
if self._crew is None:
|
180 |
+
self._crew = Crew(
|
181 |
+
agents=self.agents,
|
182 |
+
tasks=self.tasks,
|
183 |
+
verbose=True,
|
184 |
+
process=Process.sequential,
|
185 |
+
)
|
186 |
+
return self._crew
|
187 |
+
|
188 |
+
def generate(self):
|
189 |
+
"""Generates the storybook by creating agents, tasks, and running the crew."""
|
190 |
+
result = self.crew.kickoff()
|
191 |
+
print(result)
|
192 |
+
|
193 |
+
|
194 |
+
if __name__ == "__main__":
|
195 |
+
generator = StoryBookGenerator(
|
196 |
+
book="The Karamazov Brothers",
|
197 |
+
author="Fyodor Dostoevsky",
|
198 |
+
language="German",
|
199 |
+
level="A2 Beginner",
|
200 |
+
summary_size="10 Chapters, each chapter more than 100 sentences log",
|
201 |
+
writing_style="Philosophical",
|
202 |
+
)
|
203 |
+
generator.generate()
|
aistorybooks/crewaia/tools.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import markdown
|
2 |
+
import os
|
3 |
+
import pdfkit
|
4 |
+
import re
|
5 |
+
import requests
|
6 |
+
from openai import OpenAI
|
7 |
+
from pathlib import Path
|
8 |
+
from pydantic import BaseModel, Field, ConfigDict
|
9 |
+
from typing import Type
|
10 |
+
|
11 |
+
from config import Config
|
12 |
+
from crewai.tools import BaseTool
|
13 |
+
|
14 |
+
|
15 |
+
class ImageGeneratorInput(BaseModel):
|
16 |
+
"""Input schema for ImageGenerator."""
|
17 |
+
|
18 |
+
image_description: str = Field(
|
19 |
+
..., description="A detailed description of the image to be generated."
|
20 |
+
)
|
21 |
+
|
22 |
+
|
23 |
+
class ImageGenerator(BaseTool):
|
24 |
+
"""
|
25 |
+
Generates an image for a given description using the OpenAI API.
|
26 |
+
"""
|
27 |
+
|
28 |
+
name: str = "Generate Image"
|
29 |
+
description: str = (
|
30 |
+
"Generates an image for a given description using the OpenAI image generation API, "
|
31 |
+
"saves it in the current folder, and returns the image path. "
|
32 |
+
"The image description should be detailed and longer than 100 characters."
|
33 |
+
)
|
34 |
+
args_schema: Type[BaseModel] = ImageGeneratorInput
|
35 |
+
client: OpenAI = Field(
|
36 |
+
default_factory=lambda: OpenAI(api_key=Config.OPENAI_API_KEY),
|
37 |
+
description="OpenAI client instance.",
|
38 |
+
)
|
39 |
+
|
40 |
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
41 |
+
|
42 |
+
def _run(self, image_description: str) -> str:
|
43 |
+
if len(image_description) < 100:
|
44 |
+
raise ValueError("Please provide a longer image description (at least 100 characters).")
|
45 |
+
|
46 |
+
response = self.client.images.generate(
|
47 |
+
model="dall-e-3",
|
48 |
+
prompt=f"Image is about: {image_description}. "
|
49 |
+
f"Style: Illustration. Create an illustration incorporating a vivid palette with an emphasis on shades "
|
50 |
+
f"of azure and emerald, augmented by splashes of gold for contrast and visual interest. The style "
|
51 |
+
f"should evoke the intricate detail and whimsy of early 20th-century storybook illustrations, "
|
52 |
+
f"blending realism with fantastical elements to create a sense of wonder and enchantment. The "
|
53 |
+
f"composition should be rich in texture, with a soft, luminous lighting that enhances the magical "
|
54 |
+
f"atmosphere. Attention to the interplay of light and shadow will add depth and dimensionality, "
|
55 |
+
f"inviting the viewer to delve into the scene. DON'T include ANY text in this image. DON'T include "
|
56 |
+
f"colour palettes in this image.",
|
57 |
+
size="1024x1024",
|
58 |
+
quality="standard",
|
59 |
+
n=1,
|
60 |
+
)
|
61 |
+
|
62 |
+
image_url = response.data[0].url
|
63 |
+
words = image_description.split()[:5]
|
64 |
+
safe_words = [re.sub(r"[^a-zA-Z0-9_]", "", word) for word in words]
|
65 |
+
filename = "_".join(safe_words).lower() + ".png"
|
66 |
+
filepath = Path(os.getcwd()).joinpath("images", filename)
|
67 |
+
|
68 |
+
# Download the image from the URL
|
69 |
+
image_response = requests.get(image_url)
|
70 |
+
if image_response.status_code == 200:
|
71 |
+
print(f"Saving image: {filepath.as_posix()}")
|
72 |
+
filepath.write_bytes(image_response.content)
|
73 |
+
else:
|
74 |
+
print("Failed to download the image.")
|
75 |
+
return ""
|
76 |
+
|
77 |
+
return filepath.relative_to(os.getcwd()).as_posix()
|
78 |
+
|
79 |
+
|
80 |
+
class MarkdownToPdfConverterInput(BaseModel):
|
81 |
+
"""Input schema for MarkdownToPdfConverter."""
|
82 |
+
|
83 |
+
markdown_file_name: str = Field(
|
84 |
+
..., description="Path to the input Markdown file."
|
85 |
+
)
|
86 |
+
|
87 |
+
|
88 |
+
class MarkdownToPdfConverter(BaseTool):
|
89 |
+
"""
|
90 |
+
Converts a Markdown file to a PDF document using the pdfkit python library.
|
91 |
+
"""
|
92 |
+
|
93 |
+
name: str = "Convert Markdown to PDF"
|
94 |
+
description: str = (
|
95 |
+
"Converts a Markdown file to a PDF document using the pdfkit python library. "
|
96 |
+
"The input should be a valid path to a markdown file."
|
97 |
+
)
|
98 |
+
args_schema: Type[BaseModel] = MarkdownToPdfConverterInput
|
99 |
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
100 |
+
|
101 |
+
def _run(self, markdown_file_name: str) -> str:
|
102 |
+
output_file = os.path.splitext(markdown_file_name)[0] + ".pdf"
|
103 |
+
|
104 |
+
# Read the markdown file
|
105 |
+
with open(Path(markdown_file_name), "r") as f:
|
106 |
+
text = f.read()
|
107 |
+
# Convert to HTML
|
108 |
+
html = markdown.markdown(text)
|
109 |
+
# Convert to PDF
|
110 |
+
pdfkit.from_string(html, output_file, options={"enable-local-file-access": ""})
|
111 |
+
|
112 |
+
return output_file
|
aistorybooks/phidataa/__init__.py
ADDED
File without changes
|
aistorybooks/phidataa/classic_stories.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import httpx
|
2 |
+
from openai._base_client import SyncHttpxClientWrapper
|
3 |
+
from pathlib import Path
|
4 |
+
from phi.agent import Agent as PhiAgent
|
5 |
+
from phi.model.google.gemini import Gemini
|
6 |
+
from phi.model.openai.like import OpenAILike
|
7 |
+
from phi.utils.log import logger as PhiLogger
|
8 |
+
from phi.workflow import RunResponse, RunEvent
|
9 |
+
from typing import Optional, Union, Iterator
|
10 |
+
|
11 |
+
from aistorybooks.config import Config
|
12 |
+
from aistorybooks.utils import PdfUtil
|
13 |
+
|
14 |
+
|
15 |
+
class OpenAILikeNoVerifySSL(OpenAILike):
|
16 |
+
|
17 |
+
def __init__(self, base_url: Optional[Union[str, httpx.URL]] = None, *args, **kwargs):
|
18 |
+
http_client = SyncHttpxClientWrapper(
|
19 |
+
base_url=base_url,
|
20 |
+
verify=False
|
21 |
+
)
|
22 |
+
super().__init__(http_client=http_client, base_url=base_url, *args, **kwargs)
|
23 |
+
|
24 |
+
|
25 |
+
class PhiStoryBookGenerator:
|
26 |
+
|
27 |
+
def __init__(
|
28 |
+
self,
|
29 |
+
language: str = "English",
|
30 |
+
level: str = "A2 Beginner",
|
31 |
+
summary_size: str = "approximately 100 sentences and approximately 800 hundred words",
|
32 |
+
writing_style: str = "Funny",
|
33 |
+
**kwargs
|
34 |
+
):
|
35 |
+
self.language = language
|
36 |
+
self.level = level
|
37 |
+
self.summary_size = summary_size
|
38 |
+
self.writing_style = writing_style
|
39 |
+
self.model = Gemini(
|
40 |
+
id=Config.GEMINI_MODEL_NAME,
|
41 |
+
api_key=Config.GEMINI_API_KEY
|
42 |
+
)
|
43 |
+
|
44 |
+
self.author_agent = PhiAgent(
|
45 |
+
model=self.model,
|
46 |
+
description="Expert author rewriting novels to a shortened stories.",
|
47 |
+
task=(
|
48 |
+
f"Rewrite given the novel text to {self.summary_size} a story, "
|
49 |
+
f"rewrite it in a {self.writing_style} style. "
|
50 |
+
f"Target {self.level} language learners. Repeat key words. Maintain narrative flow."
|
51 |
+
f"This is a first section of the big novel. Next sections will follow."
|
52 |
+
),
|
53 |
+
markdown=True,
|
54 |
+
debug_mode=True,
|
55 |
+
)
|
56 |
+
|
57 |
+
self.translator_agent = PhiAgent(
|
58 |
+
model=self.model,
|
59 |
+
description=f"Expert English to {self.language} translator.",
|
60 |
+
task=(
|
61 |
+
f"Translate Given English text to {self.language} for {self.level} level language learners. "
|
62 |
+
f"Repeat key words. Ensure natural story flow."
|
63 |
+
),
|
64 |
+
markdown=True,
|
65 |
+
debug_mode=True,
|
66 |
+
)
|
67 |
+
|
68 |
+
def return_if_response_none(self, response: RunResponse):
|
69 |
+
if response is None:
|
70 |
+
yield RunResponse(event=RunEvent.workflow_completed, content=f"Sorry, received empty result")
|
71 |
+
|
72 |
+
# @TODO add tenacity and retry api call "rate limit exceptions"!
|
73 |
+
def _run_chunk(self, content: str, start_page, end_page) -> RunResponse:
|
74 |
+
try:
|
75 |
+
summary: RunResponse = self.author_agent.run(content)
|
76 |
+
if summary is None or summary.content is None:
|
77 |
+
return RunResponse(event="RunFailed",
|
78 |
+
content=f"Failed to generate summary for pages {start_page}-{end_page}")
|
79 |
+
|
80 |
+
translated: RunResponse = self.translator_agent.run(summary.content)
|
81 |
+
if translated is None or translated.content is None:
|
82 |
+
return RunResponse(event="RunFailed",
|
83 |
+
content=f"Failed to translate summary for pages {start_page}-{end_page}")
|
84 |
+
return translated
|
85 |
+
except Exception as e:
|
86 |
+
return RunResponse(event="RunFailed", content=f"Error processing pages {start_page}-{end_page}: {e}")
|
87 |
+
|
88 |
+
def run(self, pdf_file: Path, chunk_size=10, padding=1, skip_first_n_pages=0) -> Iterator[RunResponse]:
|
89 |
+
# final = pdf_file.parent.joinpath(f"{pdf_file.stem}.md")
|
90 |
+
data = PdfUtil.process_pdf_file(pdf_file)
|
91 |
+
chunks = PdfUtil.split_document_into_chunks(
|
92 |
+
data=data,
|
93 |
+
chunk_size=chunk_size,
|
94 |
+
padding=padding,
|
95 |
+
skip_first_n_pages=skip_first_n_pages
|
96 |
+
)
|
97 |
+
|
98 |
+
total_chunks = len(chunks)
|
99 |
+
for index, chunk in enumerate(chunks):
|
100 |
+
start_page = chunk[0].extra_info.get('page')
|
101 |
+
end_page = chunk[-1].extra_info.get('page')
|
102 |
+
total_pages = chunk[-1].extra_info.get('total_pages')
|
103 |
+
PhiLogger.info(f"Processing Pages: {start_page}-{end_page}")
|
104 |
+
chunk_str = "\n\n".join([doc.text for doc in chunk])
|
105 |
+
|
106 |
+
response = self._run_chunk(content=chunk_str, start_page=start_page, end_page=end_page)
|
107 |
+
|
108 |
+
if response.event != "RunFailed":
|
109 |
+
response.metrics[
|
110 |
+
'progress_info'] = f"Processed Pages: {skip_first_n_pages}-{end_page} of {total_pages}"
|
111 |
+
response.metrics['progress_total'] = total_chunks
|
112 |
+
response.metrics['progress_current_index'] = index + 1
|
113 |
+
response.metrics['progress_percent'] = int(((index + 1) / total_chunks) * 100)
|
114 |
+
yield response
|
aistorybooks/utils.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import pymupdf4llm
|
3 |
+
from llama_index.core.schema import Document
|
4 |
+
from pathlib import Path
|
5 |
+
from typing import List
|
6 |
+
|
7 |
+
|
8 |
+
class PdfUtil:
|
9 |
+
|
10 |
+
@staticmethod
|
11 |
+
def process_pdf_file(pdf_file: Path, save_to_pickle: bool = False) -> List[Document]:
|
12 |
+
pickle_file = pdf_file.parent.joinpath(f"{pdf_file.stem}.pkl")
|
13 |
+
|
14 |
+
if pickle_file.exists() and save_to_pickle is True:
|
15 |
+
print(f"Loading data from pickle file: {pickle_file}")
|
16 |
+
with open(pickle_file, "rb") as f:
|
17 |
+
data = pickle.load(f)
|
18 |
+
else:
|
19 |
+
print(f"Processing PDF file: {pdf_file}")
|
20 |
+
reader = pymupdf4llm.LlamaMarkdownReader()
|
21 |
+
data: List[Document] = reader.load_data(pdf_file)
|
22 |
+
|
23 |
+
if save_to_pickle:
|
24 |
+
print(f"Saving data to pickle file: {pickle_file}")
|
25 |
+
with open(pickle_file, "wb") as f:
|
26 |
+
pickle.dump(data, f)
|
27 |
+
|
28 |
+
return data
|
29 |
+
|
30 |
+
@staticmethod
|
31 |
+
def split_document_into_chunks(data: List[Document], chunk_size: int, padding: int, skip_first_n_pages=0) -> List[
|
32 |
+
List[Document]]:
|
33 |
+
chunks: List[List[Document]] = []
|
34 |
+
for i in range(skip_first_n_pages, len(data), chunk_size):
|
35 |
+
start_index = max(skip_first_n_pages, i - padding)
|
36 |
+
end_index = min(len(data), i + chunk_size + padding)
|
37 |
+
chunk_pages = data[start_index:end_index]
|
38 |
+
chunks.append(chunk_pages)
|
39 |
+
return chunks
|
app.py
ADDED
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import shutil
|
2 |
+
import statistics
|
3 |
+
import tempfile
|
4 |
+
import uuid
|
5 |
+
from dataclasses import dataclass, field
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import List
|
8 |
+
|
9 |
+
import streamlit as st
|
10 |
+
from llama_index.core.schema import Document
|
11 |
+
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
12 |
+
|
13 |
+
from aistorybooks.phidataa.classic_stories import PhiStoryBookGenerator
|
14 |
+
|
15 |
+
|
16 |
+
@dataclass
|
17 |
+
class AppInputs:
|
18 |
+
"""
|
19 |
+
Data class to hold the input values for the Streamlit app.
|
20 |
+
"""
|
21 |
+
|
22 |
+
uploaded_file: UploadedFile | None = None
|
23 |
+
language: str = "German"
|
24 |
+
level: str = "B1 Intermediate"
|
25 |
+
summary_size: str = "Long (150 sentences/1200 words)"
|
26 |
+
writing_style: str = "Philosophical"
|
27 |
+
chunk_size: int = 10
|
28 |
+
padding: int = 1
|
29 |
+
skip_first_n_pages: int = 0
|
30 |
+
language_options: List[str] = field(
|
31 |
+
default_factory=lambda: ["German", "English", "Spanish", "French"]
|
32 |
+
)
|
33 |
+
level_options: List[str] = field(
|
34 |
+
default_factory=lambda: [
|
35 |
+
"A1 Beginner",
|
36 |
+
"A2 Elementary",
|
37 |
+
"B1 Intermediate",
|
38 |
+
"B2 Upper Intermediate",
|
39 |
+
"C1 Advanced",
|
40 |
+
"C2 Proficiency",
|
41 |
+
]
|
42 |
+
)
|
43 |
+
summary_size_options: List[str] = field(
|
44 |
+
default_factory=lambda: [
|
45 |
+
"Short (50 sentences/400 words)",
|
46 |
+
"Medium (100 sentences/800 words)",
|
47 |
+
"Long (150 sentences/1200 words)",
|
48 |
+
]
|
49 |
+
)
|
50 |
+
writing_style_options: List[str] = field(
|
51 |
+
default_factory=lambda: [
|
52 |
+
"Philosophical",
|
53 |
+
"Narrative",
|
54 |
+
"Descriptive",
|
55 |
+
"Humorous",
|
56 |
+
"Formal",
|
57 |
+
]
|
58 |
+
)
|
59 |
+
|
60 |
+
|
61 |
+
def st_sidebar(inputs: AppInputs):
|
62 |
+
"""
|
63 |
+
Creates the sidebar for the Streamlit app and populates the input values.
|
64 |
+
|
65 |
+
Args:
|
66 |
+
inputs (AppInputs): An instance of the AppInputs data class.
|
67 |
+
"""
|
68 |
+
|
69 |
+
st.header("Input Options:")
|
70 |
+
with st.form(key='inputs_form', border=False):
|
71 |
+
inputs.uploaded_file = st.file_uploader(
|
72 |
+
"Upload your novel (PDF)",
|
73 |
+
type=["pdf"],
|
74 |
+
accept_multiple_files=False,
|
75 |
+
help="Upload the PDF file of the novel you want to convert.",
|
76 |
+
)
|
77 |
+
|
78 |
+
inputs.language = st.selectbox(
|
79 |
+
"Select Target Story Language",
|
80 |
+
inputs.language_options,
|
81 |
+
index=inputs.language_options.index(inputs.language),
|
82 |
+
help="Choose the language you want the storybook to be in.",
|
83 |
+
)
|
84 |
+
|
85 |
+
inputs.level = st.selectbox(
|
86 |
+
"Select Language Level",
|
87 |
+
inputs.level_options,
|
88 |
+
index=inputs.level_options.index(inputs.level),
|
89 |
+
help="Select the target language proficiency level for the storybook.",
|
90 |
+
)
|
91 |
+
|
92 |
+
inputs.summary_size = st.selectbox(
|
93 |
+
"Desired Summary Length (Per Chunk)",
|
94 |
+
inputs.summary_size_options,
|
95 |
+
index=inputs.summary_size_options.index(inputs.summary_size),
|
96 |
+
help="Specify the desired length of the summary for each chunk of the novel.",
|
97 |
+
)
|
98 |
+
|
99 |
+
inputs.writing_style = st.selectbox(
|
100 |
+
"Desired Writing Style",
|
101 |
+
inputs.writing_style_options,
|
102 |
+
index=inputs.writing_style_options.index(inputs.writing_style),
|
103 |
+
help="Choose the writing style for the generated storybook.",
|
104 |
+
)
|
105 |
+
|
106 |
+
inputs.chunk_size = st.number_input(
|
107 |
+
"Chunk Size",
|
108 |
+
min_value=1,
|
109 |
+
value=inputs.chunk_size,
|
110 |
+
help="Number of pages to process per iteration. Larger chunks may take longer to process.",
|
111 |
+
)
|
112 |
+
inputs.padding = st.number_input(
|
113 |
+
"Padding",
|
114 |
+
min_value=0,
|
115 |
+
value=inputs.padding,
|
116 |
+
help="Number of pages to overlap between chunks. Helps maintain context.",
|
117 |
+
)
|
118 |
+
inputs.skip_first_n_pages = st.number_input(
|
119 |
+
"Skip First N Pages",
|
120 |
+
min_value=0,
|
121 |
+
value=inputs.skip_first_n_pages,
|
122 |
+
help="Number of pages to skip at the beginning of the novel (e.g., table of contents).",
|
123 |
+
)
|
124 |
+
submit_button = st.form_submit_button(label='Submit')
|
125 |
+
return submit_button
|
126 |
+
|
127 |
+
|
128 |
+
def st_process_file(inputs: AppInputs) -> List[List[Document]]:
|
129 |
+
uploaded_file_name = inputs.uploaded_file.name
|
130 |
+
st.info(f"Uploaded File: **{inputs.uploaded_file.name}**. Preparing your storybook... (Working in the background)."
|
131 |
+
f" \nPlease note: Processing is powered by the free tier of Gemini, which may experience rate limiting.",
|
132 |
+
icon=":material/info:")
|
133 |
+
|
134 |
+
try:
|
135 |
+
temp_folder = Path(tempfile.mkdtemp(prefix="story_gen_temp_"))
|
136 |
+
progress_value = 0
|
137 |
+
progress = st.progress(value=progress_value, text=f"Processing file...")
|
138 |
+
pdf_file = temp_folder.joinpath(uploaded_file_name)
|
139 |
+
md_file_name = f"{pdf_file.stem}.md"
|
140 |
+
# pdf_file_final = pdf_file.parent.joinpath(f"{pdf_file.stem}_story.pdf")
|
141 |
+
pdf_file.write_bytes(inputs.uploaded_file.getvalue())
|
142 |
+
generator = PhiStoryBookGenerator(
|
143 |
+
language=inputs.language,
|
144 |
+
level=inputs.level,
|
145 |
+
summary_size=inputs.summary_size,
|
146 |
+
writing_style=inputs.writing_style,
|
147 |
+
)
|
148 |
+
st.session_state[md_file_name] = ""
|
149 |
+
button_container = st.empty()
|
150 |
+
info_container = st.empty()
|
151 |
+
it = generator.run(pdf_file=pdf_file,
|
152 |
+
chunk_size=inputs.chunk_size,
|
153 |
+
padding=inputs.padding,
|
154 |
+
skip_first_n_pages=inputs.skip_first_n_pages
|
155 |
+
)
|
156 |
+
for response in it:
|
157 |
+
if response.event == "RunFailed":
|
158 |
+
st.error(f"{response.content}", icon=":material/error:")
|
159 |
+
progress.progress(value=progress_value, text=f"Error: {response.content}")
|
160 |
+
else:
|
161 |
+
progress_value = response.metrics['progress_percent']
|
162 |
+
progress.progress(value=progress_value, text=response.metrics['progress_info'])
|
163 |
+
st.session_state[md_file_name] += f"\n\n{response.content}"
|
164 |
+
button_container.empty()
|
165 |
+
button_container.download_button(label='Download Storybook as Markdown',
|
166 |
+
data=st.session_state.get(md_file_name),
|
167 |
+
file_name=md_file_name,
|
168 |
+
mime='text/markdown',
|
169 |
+
on_click="ignore",
|
170 |
+
key=str(uuid.uuid4()),
|
171 |
+
type="primary",
|
172 |
+
icon=":material/download:",
|
173 |
+
)
|
174 |
+
info_container.empty()
|
175 |
+
metrics = generator.model.metrics
|
176 |
+
avg_response_time = statistics.mean(metrics.get('response_times', [])) if metrics else 0
|
177 |
+
input_tokens = metrics.get('input_tokens', 0) if metrics else 0
|
178 |
+
output_tokens = metrics.get('output_tokens', 0) if metrics else 0
|
179 |
+
total_tokens = metrics.get('total_tokens', 0) if metrics else 0
|
180 |
+
info_container.info(f"Model: {generator.model.name} "
|
181 |
+
f" \n Avg response time: {avg_response_time} "
|
182 |
+
f" \n Input tokens: {input_tokens} "
|
183 |
+
f" \n Output tokens: {output_tokens} "
|
184 |
+
f" \n Total tokens: {total_tokens}",
|
185 |
+
icon=":material/info:")
|
186 |
+
finally:
|
187 |
+
if temp_folder and temp_folder.exists():
|
188 |
+
shutil.rmtree(temp_folder)
|
189 |
+
st.info(f"Temporary folder and its contents cleared", icon=":material/info:")
|
190 |
+
|
191 |
+
|
192 |
+
def st_main_page(inputs: AppInputs):
|
193 |
+
"""
|
194 |
+
Creates the main page for the Streamlit app and displays the input values.
|
195 |
+
|
196 |
+
Args:
|
197 |
+
inputs (AppInputs): An instance of the AppInputs data class.
|
198 |
+
"""
|
199 |
+
st.title("Novel to Storybook Generator")
|
200 |
+
st.write("---")
|
201 |
+
options_text = f"""
|
202 |
+
**Selected Options:**
|
203 |
+
**Language:** {inputs.language} |
|
204 |
+
**Language Level:** {inputs.level} |
|
205 |
+
**Summary Length:** {inputs.summary_size} |
|
206 |
+
**Writing Style:** {inputs.writing_style} |
|
207 |
+
**Chunk Size:** {inputs.chunk_size} |
|
208 |
+
**Padding:** {inputs.padding} |
|
209 |
+
**Skip First N Pages:** {inputs.skip_first_n_pages}
|
210 |
+
"""
|
211 |
+
st.markdown(options_text)
|
212 |
+
if inputs.uploaded_file:
|
213 |
+
st_process_file(inputs=inputs)
|
214 |
+
|
215 |
+
|
216 |
+
def st_set_css_and_footer():
|
217 |
+
st.markdown(
|
218 |
+
"""
|
219 |
+
<style>
|
220 |
+
.stAppHeader {
|
221 |
+
background-color: rgba(255, 255, 255, 0.0); /* Transparent background */
|
222 |
+
visibility: visible; /* Ensure the header is visible */
|
223 |
+
}
|
224 |
+
|
225 |
+
.block-container {
|
226 |
+
padding-top: 0.5rem;
|
227 |
+
padding-bottom: 0rem;
|
228 |
+
padding-left: 5rem;
|
229 |
+
padding-right: 5rem;
|
230 |
+
}
|
231 |
+
.footer {
|
232 |
+
position: fixed;
|
233 |
+
left: 0;
|
234 |
+
bottom: 2px;
|
235 |
+
width: 100%;
|
236 |
+
text-align: right;
|
237 |
+
padding-right: 40px;
|
238 |
+
}
|
239 |
+
.footer a {
|
240 |
+
margin: 0 5px; /* Reduced margin for smaller icons */
|
241 |
+
text-decoration: none;
|
242 |
+
}
|
243 |
+
|
244 |
+
.footer img {
|
245 |
+
height: 18px; /* Adjusted height for smaller icons */
|
246 |
+
width: 18px; /* Adjusted width for smaller icons */
|
247 |
+
vertical-align: middle;
|
248 |
+
opacity: 0.7; /* Added opacity for a softer look */
|
249 |
+
transition: opacity 0.3s ease; /* Added transition for hover effect */
|
250 |
+
}
|
251 |
+
|
252 |
+
.footer img:hover {
|
253 |
+
opacity: 1.0; /* Increased opacity on hover */
|
254 |
+
}
|
255 |
+
</style>
|
256 |
+
""",
|
257 |
+
unsafe_allow_html=True,
|
258 |
+
)
|
259 |
+
|
260 |
+
footer_html = """
|
261 |
+
<div class='footer'>
|
262 |
+
<p>
|
263 |
+
Need to leverage AI for your business, improve your data and analytics setup or strategy?
|
264 |
+
Feel free to contact.
|
265 |
+
<a href="https://github.com/ismailsimsek" target="_blank">
|
266 |
+
<img src="https://cdn-icons-png.flaticon.com/512/25/25231.png" alt="GitHub">
|
267 |
+
</a>
|
268 |
+
<a href="https://www.linkedin.com/in/ismailsimsek/" target="_blank">
|
269 |
+
<img src="https://cdn-icons-png.flaticon.com/512/174/174857.png" alt="LinkedIn">
|
270 |
+
</a>
|
271 |
+
<a href="https://medium.com/@ismail-simsek" target="_blank">
|
272 |
+
<img src="https://cdn-icons-png.flaticon.com/512/1384/1384015.png" alt="Medium">
|
273 |
+
</a>
|
274 |
+
Copyright 2025 Ismail Simsek</p>
|
275 |
+
</div>
|
276 |
+
"""
|
277 |
+
st.markdown(footer_html, unsafe_allow_html=True)
|
278 |
+
|
279 |
+
def main():
|
280 |
+
st.set_page_config(layout="wide")
|
281 |
+
st_set_css_and_footer()
|
282 |
+
inputs = AppInputs()
|
283 |
+
with st.sidebar:
|
284 |
+
st_sidebar(inputs)
|
285 |
+
|
286 |
+
st_main_page(inputs)
|
287 |
+
|
288 |
+
|
289 |
+
if __name__ == "__main__":
|
290 |
+
main()
|
poem.md
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
**The Karamazov Conscience**
|
2 |
+

|
3 |
+
|
4 |
+
In the shadow of the monastery's ancient walls,<br>
|
5 |
+
Where faith and doubt entwine like ivy's grasp,<br>
|
6 |
+
A tale of brothers, bound by blood and fate,<br>
|
7 |
+
Unfolds like a dark tapestry, forever great.<br>
|
8 |
+
|
9 |
+
Alyosha, gentle soul, with heart aglow,<br>
|
10 |
+
A beacon of compassion, in a world of woe,<br>
|
11 |
+
His eyes, like lanterns, shining bright and true,<br>
|
12 |
+
Reflecting the beauty, of the human heart, anew.<br>
|
13 |
+
|
14 |
+
Ivan, the skeptic, with mind afire,<br>
|
15 |
+
A whirlwind of logic, that reason's highest desire,<br>
|
16 |
+
He questions the heavens, with a rebel's cry,<br>
|
17 |
+
And in the silence, his own demons, he'll deny.<br>
|
18 |
+
|
19 |
+
Dmitri, the sensual, with passions untamed,<br>
|
20 |
+
A heart, aflutter, like a bird, in love's sweet name,<br>
|
21 |
+
His desires, a maelstrom, that rage like the sea,<br>
|
22 |
+
As he navigates, the treacherous shores, of humanity.<br>
|
23 |
+
|
24 |
+
Smerdyakov, the serpent, with a heart of stone,<br>
|
25 |
+
A darkness, that festers, like a wound, untold,<br>
|
26 |
+
His eyes, like ice, that freeze the blood in vain,<br>
|
27 |
+
As he weaves a web, of deceit, and deadly gain.<br>
|
28 |
+
|
29 |
+
Fyodor Pavlovich, the patriarch, of old,<br>
|
30 |
+
A man, of contradictions, with a heart, of gold,<br>
|
31 |
+
His legacy, a tangle, of love and strife,<br>
|
32 |
+
A family, entwined, in a dance, of life.<br>
|
33 |
+
|
34 |
+
In the town, of Skotoprigonievsk, where the Volga flows,<br>
|
35 |
+
The Karamazov brothers, in a tragic, waltz, dispose,<br>
|
36 |
+
Their lives, a labyrinth, of love, and bitter strife,<br>
|
37 |
+
A dance, of humanity, in the shadow, of life.<br>
|
38 |
+
|
39 |
+
In this, their story, a philosophical treatise, unfolds,<br>
|
40 |
+
A quest, for meaning, in the human, heart of gold,<br>
|
41 |
+
A search, for truth, in the labyrinth, of the mind,<br>
|
42 |
+
A journey, through the darkness, to the light, we find.<br>
|
requirements.txt
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
crewai[tools]>=0.108.0
|
2 |
+
mdpdf
|
3 |
+
markdown
|
4 |
+
pdfkit
|
5 |
+
langchain-community
|
6 |
+
pyautogen[retrievechat]
|
7 |
+
poppler-utils
|
8 |
+
ipython
|
9 |
+
streamlit
|
10 |
+
llama_index
|
11 |
+
pymupdf4llm
|
12 |
+
tenacity
|
13 |
+
llama_index
|
14 |
+
pymupdf4llm
|
15 |
+
phidata
|
16 |
+
openai
|
17 |
+
google-generativeai
|
story.md
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
**Die Karamazov-Brüder: Eine Philosophische Odyssee**
|
2 |
+
<br>
|
3 |
+
|
4 |
+
**Kapitel 1: Die Karamazov-Familie**
|
5 |
+
Die Karamazov-Familie, ein komplexes Netzwerk von Beziehungen und philosophischen Debatten, steht im Zentrum dieses
|
6 |
+
epischen Romans. Die Geschichte dreht sich um die drei Brüder, Dmitri, Ivan und Alyosha, die jeweils unterschiedliche
|
7 |
+
philosophische Perspektiven vertreten. Ihr Vater, Fyodor Pavlovich Karamazov, ein reicher und unmoralischer
|
8 |
+
Großgrundbesitzer, ist der Katalysator für die Ereignisse, die sich entwickeln.
|
9 |
+
|
10 |
+
Dmitri, der älteste, ist ein sinnlicher und impulsiver Offizier, der die Prinzipien des Hedonismus und Nihilismus
|
11 |
+
verkörpert. Ivan, der mittlere Bruder, ist ein rationaler und atheistischer Intellektueller, der die Ideale des
|
12 |
+
Determinismus und der moralischen Relativität verkörpert. Alyosha, der jüngste, ist ein mitfühlender und spiritueller
|
13 |
+
Novize, der die Prinzipien der christlichen Moral und Altruismus verkörpert.
|
14 |
+
|
15 |
+
**Kapitel 2: Das Zusammenkommen des Sturms**
|
16 |
+
Die Geschichte beginnt mit der Ankunft von Dmitri in der kleinen Stadt Skotoprigonievsk, wo sein Vater, Fyodor
|
17 |
+
Pavlovich, die Ankunft seiner Söhne erwartet. Die Spannung zwischen den Familienmitgliedern ist spürbar, als sie sich im
|
18 |
+
Kloster versammeln, um das Erbe des Vaters zu diskutieren.
|
19 |
+
|
20 |
+
Inzwischen hat die geheimnisvolle und schöne Grushenka, eine Verführerin mit einer mysteriösen Vergangenheit, die
|
21 |
+
Aufmerksamkeit von Dmitri und seinem Vater erregt. Als die familiären Dynamiken sich entwickeln, beginnen die
|
22 |
+
philosophischen Debatten zwischen den Brüdern zu formen.
|
23 |
+
|
24 |
+
**Kapitel 3: Der Große Inquisitor**
|
25 |
+
Ivan, der Atheist, präsentiert sein berühmtes Gedicht "Der Große Inquisitor", eine scharfe Kritik am Christentum und
|
26 |
+
seiner vermeintlichen Heuchelei. Dies löst einen heftigen Streit zwischen Ivan und Alyosha aus, als sie sich mit der
|
27 |
+
Natur der Moral, der Freiheit des Willens und der Existenz Gottes auseinandersetzen.
|
28 |
+
|
29 |
+
In diesem Kapitel verwebt Dostojewski philosophische Konzepte, wie das Problem des Bösen, die Natur der Moral und die
|
30 |
+
Rolle des Einzelnen in der Gesellschaft, zu einem feinen Gewebe.
|
31 |
+
|
32 |
+
**Kapitel 4: Die Geheimnisvolle Grushenka**
|
33 |
+
Grushenkas Anwesenheit sorgt weiterhin für Unruhe, als Dmitri immer mehr von ihr besessen wird. Inzwischen wird Alyoshas
|
34 |
+
mitfühlende Natur auf die Probe gestellt, als er versucht, die streitenden Fraktionen innerhalb der Familie zu
|
35 |
+
versöhnen.
|
36 |
+
|
37 |
+
Durch Grushenkas Charakter erkundet Dostojewski die Themen der Moral, der Versuchung und der menschlichen Natur, indem
|
38 |
+
er Fragen über die Natur des Guten und Bösen aufwirft.
|
39 |
+
|
40 |
+
**Kapitel 5: Der Vatermord**
|
41 |
+
Die Spannung zwischen Fyodor Pavlovich und seinen Söhnen erreicht einen Siedepunkt, der in einem tragischen Ereignis
|
42 |
+
kulminiert, das ihr Leben für immer verändern wird. Als die Familie auseinanderbricht, nehmen die philosophischen
|
43 |
+
Debatten eine neue Dringlichkeit an.
|
44 |
+
|
45 |
+
In diesem Kapitel erkundet Dostojewski die Konsequenzen des moralischen Relativismus, die Natur der Gerechtigkeit und
|
46 |
+
die Rolle des Einzelnen bei der Gestaltung seines eigenen Schicksals.
|
47 |
+
|
48 |
+
**Kapitel 6: Die Untersuchung**
|
49 |
+
Als die Untersuchung zum Vatermord beginnt, werden die dunklen Geheimnisse der Familie enthüllt, und die Brüder werden
|
50 |
+
gezwungen, ihre eigenen moralischen Mängel zu konfrontieren. Ivans Rationalismus wird auf die Probe gestellt, als er
|
51 |
+
sich mit den Konsequenzen seines eigenen Atheismus auseinandersetzt.
|
52 |
+
|
53 |
+
Dostojewski erkundet die Themen der Schuld, der Verantwortung und der Natur der Gerechtigkeit, indem er fundamentale
|
54 |
+
Fragen über die menschliche Natur aufwirft.
|
55 |
+
|
56 |
+
**Kapitel 7: Der Prozess**
|
57 |
+
Der Prozess gegen Dmitri Karamazov dient als Hintergrund für eine tiefere Erkundung der Moral, der Gerechtigkeit und der
|
58 |
+
Rolle des Einzelnen in der Gesellschaft. Als die Anklage und die Verteidigung ihre Fälle vorbringen, erreichen die
|
59 |
+
philosophischen Debatten einen Höhepunkt.
|
60 |
+
|
61 |
+
In diesem Kapitel erkundet Dostojewski die Spannung zwischen individueller Freiheit und gesellschaftlichen Zwängen,
|
62 |
+
indem er Fragen über die Natur der Moral und die Rolle des Staates aufwirft.
|
63 |
+
|
64 |
+
**Kapitel 8: Das Urteil**
|
65 |
+
Das Urteil wird gefällt, und die Familie muss sich mit den Konsequenzen ihrer Handlungen auseinandersetzen. Als die
|
66 |
+
Brüder über die Ereignisse reflektieren, die sich entwickelt haben, werden sie gezwungen, ihre eigenen moralischen
|
67 |
+
Mängel zu konfrontieren.
|
68 |
+
|
69 |
+
Dostojewski erkundet die Themen der Erlösung, der Vergebung und der Möglichkeit des moralischen Wachstums, indem er
|
70 |
+
fundamentale Fragen über die menschliche Natur aufwirft.
|
71 |
+
|
72 |
+
**Kapitel 9: Der Weg zur Erlösung**
|
73 |
+
Im Anschluss an den Prozess machen die Brüder sich auf eine Reise der Selbstentdeckung, indem sie sich mit den
|
74 |
+
Konsequenzen ihrer Handlungen auseinandersetzen. Alyoshas Mitgefühl und Weisheit dienen als Leuchtfeuer der Hoffnung,
|
75 |
+
als die Familie beginnt, zu heilen und sich neu zu formieren.
|
76 |
+
|
77 |
+
In diesem Kapitel erkundet Dostojewski die Themen der Erlösung, der Vergebung und der Möglichkeit des moralischen
|
78 |
+
Wachstums, indem er fundamentale Fragen über die menschliche Natur aufwirft.
|
79 |
+
|
80 |
+
**Kapitel 10: Das Vermächtnis der Karamazovs**
|
81 |
+
Als die Geschichte zu Ende geht, treten die Karamazov-Brüder transformiert hervor, ihre philosophischen Debatten haben
|
82 |
+
ihr Verständnis der Welt und ihrer Rolle darin geprägt. Der Roman endet mit einem Gefühl der Hoffnung und Erlösung, als
|
83 |
+
die Brüder in die Zukunft blicken, für immer von ihren Erfahrungen geprägt.
|
84 |
+
|
85 |
+
In diesem letzten Kapitel verwebt Dostojewski die philosophischen Fäden, die sich durch den ganzen Roman gezogen haben,
|
86 |
+
zu einem reichen Teppich von Ideen, die bis heute bei den Lesern nachhallen.
|
tests/__init__.py
ADDED
File without changes
|
tests/test_phidata.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import unittest
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
from aistorybooks.phidata.classic_stories import PhiStoryBookGenerator
|
5 |
+
|
6 |
+
|
7 |
+
@unittest.skip("Local user test only")
|
8 |
+
class TestPhiStoryBookGenerator(unittest.TestCase):
|
9 |
+
|
10 |
+
def test_story_book_generator(self):
|
11 |
+
pdf_file = Path(__file__).parent.joinpath("resources/LoremIpsum.pdf")
|
12 |
+
|
13 |
+
generator = PhiStoryBookGenerator(
|
14 |
+
language="German",
|
15 |
+
level="A1 Intermediate",
|
16 |
+
summary_size="Long (150 sentences/1200 words)",
|
17 |
+
writing_style="Philosophical",
|
18 |
+
)
|
19 |
+
|
20 |
+
results = generator.run(pdf_file=pdf_file, chunk_size=1, padding=0, skip_first_n_pages=0)
|
21 |
+
|
22 |
+
i = 0
|
23 |
+
for result in results:
|
24 |
+
i += 1
|
25 |
+
print(result.metrics)
|
26 |
+
if i > 2:
|
27 |
+
raise "STOP"
|
28 |
+
|
29 |
+
@unittest.skip("Local user test only")
|
30 |
+
def test_story_book_generator_run_chunk(self):
|
31 |
+
generator = PhiStoryBookGenerator(
|
32 |
+
language="German",
|
33 |
+
level="A1 Intermediate",
|
34 |
+
summary_size="Long (150 sentences/1200 words)",
|
35 |
+
writing_style="Funny",
|
36 |
+
)
|
37 |
+
|
38 |
+
result = generator.author_agent.run("hello, how are you doing")
|
39 |
+
print(result)
|
40 |
+
result = generator._run_chunk(content="hello, how are you doing", start_page=0, end_page=1)
|
41 |
+
print(result)
|
tests/test_utils.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import unittest
|
2 |
+
from llama_index.core.schema import Document
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import List
|
5 |
+
|
6 |
+
from aistorybooks.utils import PdfUtil
|
7 |
+
|
8 |
+
|
9 |
+
@unittest.skip("Local user test only")
|
10 |
+
class TestPdfUtil(unittest.TestCase):
|
11 |
+
|
12 |
+
@unittest.skip("TODO implement")
|
13 |
+
def test_process_pdf_file_new_file(self):
|
14 |
+
pass
|
15 |
+
|
16 |
+
def test_split_document_into_chunks(self):
|
17 |
+
data = [Document(text=f"Page {i}") for i in range(1, 11)]
|
18 |
+
chunk_size = 5
|
19 |
+
padding = 2
|
20 |
+
skip_first_n_pages = 0
|
21 |
+
|
22 |
+
chunks = PdfUtil.split_document_into_chunks(
|
23 |
+
data, chunk_size, padding, skip_first_n_pages
|
24 |
+
)
|
25 |
+
|
26 |
+
self.assertEqual(len(chunks), 2)
|
27 |
+
self.assertEqual(len(chunks[0]), 7)
|
28 |
+
self.assertEqual(len(chunks[1]), 7)
|
29 |
+
|
30 |
+
self.assertEqual(chunks[0][0].text, "Page 1")
|
31 |
+
self.assertEqual(chunks[0][-1].text, "Page 7")
|
32 |
+
self.assertEqual(chunks[1][0].text, "Page 4")
|
33 |
+
self.assertEqual(chunks[1][-1].text, "Page 10")
|
34 |
+
|
35 |
+
def test_split_document_into_chunks_empty_data(self):
|
36 |
+
data: List[Document] = []
|
37 |
+
chunk_size = 5
|
38 |
+
padding = 2
|
39 |
+
skip_first_n_pages = 0
|
40 |
+
|
41 |
+
chunks = PdfUtil.split_document_into_chunks(
|
42 |
+
data, chunk_size, padding, skip_first_n_pages
|
43 |
+
)
|
44 |
+
|
45 |
+
self.assertEqual(len(chunks), 0)
|
46 |
+
|
47 |
+
def test_document_info(self):
|
48 |
+
pdf_file = Path(__file__).parent.joinpath("resources/LoremIpsum.pdf")
|
49 |
+
document = PdfUtil.process_pdf_file(pdf_file=pdf_file, save_to_pickle=False)
|
50 |
+
print(document[0].extra_info)
|