xcx0902 commited on
Commit
b51c975
·
verified ·
1 Parent(s): 74f1e9e

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. generate_data.py +41 -0
  3. tiny_llm.pth +3 -0
  4. train.py +108 -0
  5. train_data.txt +193 -0
  6. vocab.json +1 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *config*
generate_data.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import json
3
+ import threading
4
+
5
+ config: dict = json.loads(open("config.json").read())
6
+
7
+ dataset = []
8
+
9
+ def gen():
10
+ try:
11
+ response = openai.OpenAI(
12
+ api_key=config["key"],
13
+ base_url=config["url"],
14
+ default_headers={
15
+ "User-Agent": "OpenAI SDK",
16
+ "Cookie": config.get("cookie", "")
17
+ }
18
+ ).chat.completions.create(
19
+ model=config["model"],
20
+ extra_body={"model_id": config["model"]},
21
+ messages=[
22
+ {"role": "user", "content": "Generate some texts for training LLM. Do not add any other elements to your response."}
23
+ ],
24
+ stream=False
25
+ )
26
+ dataset.append(response.choices[0].message.content)
27
+ except Exception as e:
28
+ print(e)
29
+
30
+ thpool = []
31
+
32
+ for i in range(100):
33
+ th = threading.Thread(target=gen)
34
+ th.start()
35
+ thpool.append(th)
36
+
37
+ for th in thpool:
38
+ th.join()
39
+
40
+ with open("train_data.txt", "a", encoding="utf-8") as f:
41
+ f.write("\n".join(dataset))
tiny_llm.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e667fcebb47365f15110285118cf5941aeca57b08cea65da785850c1e3f282da
3
+ size 24274
train.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import os
5
+ import json
6
+ from tqdm import tqdm, trange
7
+ import time
8
+
9
+ # Generate simple training data
10
+ training_text = open("train_data.txt", encoding="utf-8").read()
11
+ chars = sorted(list(set(training_text))) # Unique characters
12
+ char_to_idx = {ch: i for i, ch in enumerate(chars)}
13
+ idx_to_char = {i: ch for i, ch in enumerate(chars)}
14
+
15
+ # Model parameters
16
+ input_size = len(chars)
17
+ hidden_size = 32
18
+ output_size = len(chars)
19
+ sequence_length = 5
20
+ epochs = 1000
21
+ learning_rate = 0.0001
22
+ model_path = "tiny_llm.pth"
23
+
24
+ # Create training data (input-output pairs)
25
+ train_data = []
26
+ for i in range(len(training_text) - sequence_length):
27
+ input_seq = training_text[i : i + sequence_length]
28
+ target_char = training_text[i + sequence_length]
29
+ train_data.append((torch.tensor([char_to_idx[ch] for ch in input_seq]), char_to_idx[target_char]))
30
+
31
+ # Define the simple RNN model
32
+ class SimpleRNN(nn.Module):
33
+ def __init__(self, input_size, hidden_size, output_size):
34
+ super(SimpleRNN, self).__init__()
35
+ self.hidden_size = hidden_size
36
+ self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
37
+ self.fc = nn.Linear(hidden_size, output_size)
38
+
39
+ def forward(self, x, hidden):
40
+ x = torch.nn.functional.one_hot(x, num_classes=input_size).float()
41
+ out, hidden = self.rnn(x.unsqueeze(0), hidden)
42
+ out = self.fc(out[:, -1, :]) # Take last time step's output
43
+ return out, hidden
44
+
45
+ # Load model if available
46
+ if os.path.exists(model_path):
47
+ model = torch.load(model_path, weights_only=False)
48
+ with open("vocab.json", "r") as f:
49
+ chars = json.loads(f.read())
50
+ char_to_idx = {ch: i for i, ch in enumerate(chars)}
51
+ idx_to_char = {i: ch for i, ch in enumerate(chars)}
52
+ print("Loaded pre-trained model.")
53
+ else:
54
+ print("Training new model...")
55
+ # Initialize the model
56
+ model = SimpleRNN(input_size, hidden_size, output_size)
57
+ criterion = nn.CrossEntropyLoss()
58
+ optimizer = optim.Adam(model.parameters(), lr=learning_rate)
59
+ for epoch in range(epochs):
60
+ try:
61
+ total_loss = 0
62
+ hidden = torch.zeros(1, 1, hidden_size)
63
+
64
+ pbar = tqdm(train_data, desc=f"Epoch={epoch}, Loss=N/A")
65
+ count = 0
66
+ for input_seq, target in pbar:
67
+ count += 1
68
+ optimizer.zero_grad()
69
+ output, hidden = model(input_seq, hidden.detach())
70
+ loss = criterion(output, torch.tensor([target]))
71
+ loss.backward()
72
+ optimizer.step()
73
+ total_loss += loss.item()
74
+ pbar.desc = f"Epoch={epoch}, Loss={total_loss / count:.12f}"
75
+
76
+ pbar.close()
77
+ time.sleep(1)
78
+ except KeyboardInterrupt:
79
+ break
80
+
81
+ hidden = torch.zeros(1, 1, hidden_size)
82
+ output, hidden = model(input_seq, hidden.detach())
83
+
84
+ # Save the trained model
85
+ torch.save(model, model_path)
86
+ with open("vocab.json", "w") as f:
87
+ f.write(json.dumps(chars))
88
+ print("Model saved.")
89
+
90
+ # Text generation function
91
+ def generate_text(start_text, length=10000):
92
+ model.eval()
93
+ hidden = torch.zeros(1, 1, hidden_size)
94
+ input_seq = torch.tensor([char_to_idx[ch] for ch in start_text])
95
+
96
+ generated_text = start_text
97
+ for _ in trange(length):
98
+ output, hidden = model(input_seq, hidden)
99
+ predicted_idx = output.argmax().item()
100
+ generated_text += idx_to_char[predicted_idx]
101
+ input_seq = torch.cat((input_seq[1:], torch.tensor([predicted_idx])))
102
+
103
+ return generated_text
104
+
105
+ # Generate some text
106
+
107
+ while True:
108
+ print("LLM Output: ", generate_text(input("Ask LLM: ")))
train_data.txt ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The quick brown fox jumps over the lazy dog. This sentence contains every letter of the English alphabet.
2
+
3
+ Artificial intelligence is a wonderful field that combines computer science and large datasets to create systems capable of performing complex tasks.
4
+
5
+ In the heart of the city, a small coffee shop thrives, attracting customers with its aromatic brews and cozy atmosphere.
6
+
7
+ Quantum mechanics is a fundamental theory in physics that describes nature at the smallest scales, such as atoms and subatomic particles.
8
+
9
+ Exploring space has always been a dream of humanity, with missions to the Moon, Mars, and beyond capturing our imagination.
10
+
11
+ Learning new languages opens up opportunities for communication and understanding across different cultures and communities.
12
+
13
+ The history of ancient civilizations reveals how humans have evolved socially, politically, and technologically over thousands of years.
14
+
15
+ Cooking is both an art and a science, requiring creativity, precision, and a deep understanding of flavors and ingredients.
16
+
17
+ Climate change poses significant challenges to our planet, influencing weather patterns, ecosystems, and biodiversity worldwide.
18
+
19
+ Books are portals to other worlds, offering knowledge, entertainment, and inspiration through stories and ideas crafted by authors.
20
+ The quick brown fox jumps over the lazy dog. This sentence contains every letter of the English alphabet.
21
+
22
+ Artificial intelligence is a wonderful field that combines computer science and data to create intelligent systems.
23
+
24
+ In the heart of the city, a small café serves the best coffee, attracting visitors from all around the neighborhood.
25
+
26
+ Exploring space has always been a dream of humanity, with countless scientists working tirelessly to make it a reality.
27
+
28
+ Books open doors to new worlds, allowing readers to experience lives and cultures different from their own.
29
+
30
+ Climate change poses significant challenges, but innovative solutions are being developed to combat its effects worldwide.
31
+
32
+ Learning new skills can be both challenging and rewarding, often leading to personal and professional growth.
33
+
34
+ History is filled with fascinating stories of individuals who changed the world with their ideas and determination.
35
+
36
+ Music has the power to evoke deep emotions, bringing people together across different cultures and languages.
37
+
38
+ Technology continues to evolve at a rapid pace, transforming the way we live, work, and communicate with each other.
39
+ The quick brown fox jumps over the lazy dog. This sentence contains every letter of the English alphabet.
40
+
41
+ In the heart of the city, a small café serves the best coffee, attracting visitors from all around the neighborhood.
42
+
43
+ Autumn leaves fall gently to the ground, painting the earth in shades of red, orange, and yellow.
44
+
45
+ A journey of a thousand miles begins with a single step, as the ancient philosopher once said.
46
+
47
+ The scientist conducted numerous experiments to test her hypothesis about the behavior of light in different mediums.
48
+
49
+ History books are filled with stories of great leaders who shaped the world through their vision and determination.
50
+
51
+ Under the starlit sky, a group of friends gathered around a campfire, sharing stories and laughter late into the night.
52
+
53
+ Deep in the forest, a hidden cabin provided shelter for travelers seeking refuge from the storm.
54
+
55
+ Every morning, the fisherman sets out to sea, hoping for a bountiful catch to bring back to his family.
56
+
57
+ Artificial intelligence is transforming industries, offering new possibilities for innovation and efficiency in everyday tasks.
58
+ The quick brown fox jumps over the lazy dog.
59
+ A journey of a thousand miles begins with a single step.
60
+ The sun sets beautifully over the horizon, painting the sky in hues of orange and pink.
61
+ Learning is a lifelong process that enriches our minds and souls.
62
+ In the heart of the forest, a gentle stream winds its way through ancient trees.
63
+ Books are windows to countless worlds, waiting to be explored.
64
+ Every challenge presents an opportunity for growth and self-discovery.
65
+ The aroma of freshly brewed coffee filled the cozy kitchen on a rainy morning.
66
+ Friendship is built on trust, understanding, and shared experiences.
67
+ The stars twinkled like diamonds scattered across the velvet night sky.
68
+
69
+ Technology continues to evolve at an unprecedented pace, shaping the future of humanity.
70
+ A small act of kindness can have a ripple effect, touching lives in unexpected ways.
71
+ Mountains stood tall against the clear blue sky, their peaks dusted with snow.
72
+ Music has the power to evoke emotions words alone cannot express.
73
+ Curiosity drives us to ask questions, seek answers, and expand our horizons.
74
+ The ocean waves crashed against the shore, a symphony of nature's rhythm.
75
+ History teaches us valuable lessons if we take the time to listen and reflect.
76
+ Children laughed as they played under the warm glow of the afternoon sun.
77
+ Dreams inspire us to reach beyond our limits and create something extraordinary.
78
+ Life is a tapestry woven from moments of joy, struggle, love, and resilience.
79
+ The quick brown fox jumps over the lazy dog. This sentence contains every letter of the English alphabet, making it a pangram often used for typing practice and font displays.
80
+
81
+ Artificial intelligence is a wonderful field that combines computer science and robust datasets to enable problem-solving. It encompasses various subfields such as machine learning, neural networks, and natural language processing.
82
+
83
+ In the heart of the city, there is a small café known for its aromatic coffee and freshly baked pastries. Every morning, people from all walks of life gather here to enjoy a warm cup of coffee and share stories.
84
+
85
+ The sun sets behind the mountains, painting the sky with hues of orange and pink. As night falls, the stars begin to twinkle, creating a mesmerizing view that captivates everyone who gazes upon it.
86
+
87
+ Books are a great source of knowledge and entertainment. They transport readers to different worlds, introduce them to diverse characters, and offer insights into various aspects of life, history, and culture.
88
+
89
+ Walking through the forest, one can hear the rustling of leaves and the chirping of birds. The air is fresh, filled with the scent of pine and earth, providing a peaceful escape from the hustle and bustle of daily life.
90
+
91
+ Technology continues to evolve at a rapid pace, transforming industries and reshaping the way we live and work. From smartphones to autonomous vehicles, innovations are constantly emerging to enhance convenience and efficiency.
92
+
93
+ Music has the power to evoke emotions and bring people together. Whether it’s a classical symphony, a lively jazz tune, or a modern pop song, melodies have a unique way of resonating with listeners across the globe.
94
+
95
+ Exploring new places can be an exhilarating experience. Each destination offers its own charm, from historical landmarks and vibrant cultures to stunning landscapes and hidden gems waiting to be discovered.
96
+
97
+ Science fiction novels often explore futuristic concepts and imaginative worlds. These stories challenge readers to think beyond the present and consider possibilities about space exploration, time travel, and advanced technologies.
98
+ The quick brown fox jumps over the lazy dog. This sentence contains every letter of the English alphabet.
99
+
100
+ Artificial intelligence is a wonderful field that combines computer science and data to create intelligent systems.
101
+
102
+ The sun sets in the west, painting the sky with hues of orange and pink, marking the end of another day.
103
+
104
+ Reading books is an excellent way to expand your knowledge and improve your vocabulary.
105
+
106
+ In the heart of the city, a small café serves the best coffee, attracting visitors from all around.
107
+
108
+ Learning new skills can be challenging, but it is also incredibly rewarding and fulfilling.
109
+
110
+ The mountain trail was steep and rocky, but the view from the top was absolutely breathtaking.
111
+
112
+ Music has the power to evoke deep emotions and bring back memories from the past.
113
+
114
+ Scientists are constantly exploring new ways to harness renewable energy sources like wind and solar power.
115
+
116
+ A journey of a thousand miles begins with a single step, highlighting the importance of starting small.
117
+ The quick brown fox jumps over the lazy dog.
118
+ A journey of a thousand miles begins with a single step.
119
+ The sun rises in the east and sets in the west.
120
+ Water is essential for all forms of life on Earth.
121
+ Reading books can open doors to new worlds and ideas.
122
+ The cat slept on the couch while the rain poured outside.
123
+ Technology continues to evolve at a rapid pace.
124
+ Healthy eating habits contribute to a longer life.
125
+ Artificial intelligence is transforming various industries.
126
+ The mountain peak was covered in snow all year round.
127
+ Friendship is built on trust, respect, and shared experiences.
128
+ Learning a new language can be both challenging and rewarding.
129
+ The river flows gently through the lush green valley.
130
+ Music has the power to evoke deep emotions in people.
131
+ Innovation often arises from solving everyday problems.
132
+ Children played happily in the park under the warm sun.
133
+ History teaches us valuable lessons about the past.
134
+ The scientist conducted numerous experiments to test the theory.
135
+ Walking in nature helps reduce stress and improve mental health.
136
+ Good communication skills are vital for personal and professional success.
137
+ The quick brown fox jumps over the lazy dog. This sentence contains every letter of the English alphabet.
138
+
139
+ In the heart of the city, skyscrapers tower above the bustling streets, where people from all walks of life go about their daily routines.
140
+
141
+ A gentle breeze rustled the leaves of the old oak tree, casting dappled shadows on the ground below. Birds chirped melodiously, adding to the serene ambiance of the park.
142
+
143
+ History is filled with tales of great explorers who ventured into the unknown, driven by curiosity and the desire to uncover new lands and cultures.
144
+
145
+ Quantum physics challenges our understanding of reality, introducing concepts that seem to defy common sense, such as entanglement and superposition.
146
+
147
+ Artificial intelligence has rapidly evolved over the past decade, transforming industries and reshaping the way we interact with technology in our everyday lives.
148
+
149
+ The aroma of freshly brewed coffee wafted through the air, enticing customers to step into the cozy café tucked away on a quiet street corner.
150
+
151
+ Music has the power to evoke deep emotions, transporting listeners to different times and places with just a single chord or melody.
152
+
153
+ Ancient civilizations built remarkable structures that continue to astonish us today, from the pyramids of Egypt to the temples of Angkor Wat.
154
+
155
+ Climate change poses one of the greatest challenges humanity has ever faced, requiring urgent action to mitigate its impacts and protect future generations.
156
+ The quick brown fox jumps over the lazy dog. This sentence contains every letter of the English alphabet, making it a pangram often used for typing practice and font displays.
157
+
158
+ Artificial intelligence is transforming industries by enabling machines to learn from data and make decisions with minimal human intervention. From healthcare to finance, AI applications are expanding rapidly.
159
+
160
+ The history of the Roman Empire spans centuries, beginning with the founding of Rome in 753 BC and ending with the fall of Constantinople in 1453 AD. It was one of the most powerful empires in world history.
161
+
162
+ Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize foods with the help of chlorophyll. This natural phenomenon is essential for life on Earth.
163
+
164
+ Quantum mechanics describes the behavior of matter and energy at very small scales, such as atoms and subatomic particles. It has led to groundbreaking technologies like semiconductors and lasers.
165
+
166
+ Mount Everest, located in the Himalayan mountain range, is the highest peak on Earth above sea level. Climbers from around the world attempt to summit it each year, despite its challenging conditions.
167
+
168
+ In literature, metaphors are figures of speech that compare two unlike things by stating that one thing is another. They add depth and creativity to writing, allowing readers to see connections in new ways.
169
+
170
+ Renewable energy sources, such as solar, wind, and hydroelectric power, are becoming increasingly important as the world seeks sustainable alternatives to fossil fuels. These technologies aim to reduce carbon emissions.
171
+
172
+ The periodic table organizes chemical elements based on their atomic number and properties. Created by Dmitri Mendeleev in 1869, it remains a cornerstone of chemistry education and research.
173
+
174
+ Cultural diversity enriches societies by bringing together people with different traditions, languages, and perspectives. Embracing this diversity fosters mutual understanding and innovation across communities.
175
+ The quick brown fox jumps over the lazy dog. This sentence contains every letter of the English alphabet, making it a popular choice for typing practice and font displays.
176
+
177
+ Artificial intelligence is transforming industries by automating repetitive tasks, analyzing vast datasets, and enabling smarter decision-making. Its applications range from healthcare to finance, and its potential continues to grow as technology advances.
178
+
179
+ In a small village nestled at the foot of towering mountains, life moved at a slower pace. Farmers tended to their fields, children played by the river, and elders shared stories under the ancient oak tree that had stood there for centuries.
180
+
181
+ Scientific discoveries often begin with curiosity and persistence. Marie Curie’s groundbreaking work on radioactivity not only earned her two Nobel Prizes but also paved the way for advancements in medicine and energy production.
182
+
183
+ Rain poured down in sheets, drenching everything in its path. Despite the storm, she walked briskly through the empty streets, her mind racing with thoughts of what lay ahead. Each step brought her closer to answers—and more questions.
184
+
185
+ Books have the power to transport readers to different worlds, offering insights into lives and cultures far removed from their own. Whether fiction or nonfiction, they inspire imagination and deepen understanding.
186
+
187
+ Technology has revolutionized communication, allowing people across the globe to connect instantly. From handwritten letters to emails and video calls, the evolution of how we share ideas reflects humanity's relentless pursuit of progress.
188
+
189
+ A chef carefully plated the final dish, garnishing it with delicate herbs and edible flowers. The aroma filled the kitchen, promising a feast that would delight both the palate and the eyes. Culinary artistry turns simple ingredients into extraordinary experiences.
190
+
191
+ History teaches us valuable lessons about resilience, innovation, and human nature. By studying past civilizations, we gain perspective on our present challenges and inspiration for shaping a better future.
192
+
193
+ Music transcends language barriers, evoking emotions that words alone cannot express. From classical symphonies to modern pop songs, melodies resonate deeply within us, creating connections that span generations and geographies.
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["\n", " ", "'", ",", "-", ".", "1", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "I", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "W", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "\u00e9", "\u2014", "\u2019"]