xcx0902 commited on
Commit
b9d1833
·
verified ·
1 Parent(s): f8e2e3c

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. parameter.json +6 -0
  2. run.py +54 -0
  3. train.py +36 -60
parameter.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "hidden_size": 2048,
3
+ "sequence_length": 5,
4
+ "learning_rate": 0.0001,
5
+ "model_path": "tiny_llm_hidden2048.pth"
6
+ }
run.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import json
4
+ from tqdm import tqdm, trange
5
+
6
+ # Model parameters
7
+ parameters = json.loads(open("parameter.json").read())
8
+ model_path = parameters["model_path"]
9
+
10
+ # Define the simple RNN model
11
+ class SimpleRNN(nn.Module):
12
+ def __init__(self, input_size, hidden_size, output_size):
13
+ super(SimpleRNN, self).__init__()
14
+ self.hidden_size = hidden_size
15
+ self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
16
+ self.fc = nn.Linear(hidden_size, output_size)
17
+
18
+ def forward(self, x, hidden):
19
+ x = torch.nn.functional.one_hot(x, num_classes=input_size).float()
20
+ out, hidden = self.rnn(x.unsqueeze(0), hidden)
21
+ out = self.fc(out[:, -1, :]) # Take last time step's output
22
+ return out, hidden
23
+
24
+ model = torch.load(model_path, weights_only=False)
25
+ with open("vocab.json", "r") as f:
26
+ chars = json.loads(f.read())
27
+ char_to_idx = {ch: i for i, ch in enumerate(chars)}
28
+ idx_to_char = {i: ch for i, ch in enumerate(chars)}
29
+ print("Loaded pre-trained model.")
30
+
31
+ input_size = len(chars)
32
+ hidden_size = parameters["hidden_size"]
33
+ output_size = len(chars)
34
+
35
+ # Text generation function
36
+ def generate_text(start_text, length):
37
+ model.eval()
38
+ hidden = torch.zeros(1, 1, hidden_size)
39
+ input_seq = torch.tensor([char_to_idx[ch] for ch in start_text])
40
+
41
+ generated_text = start_text
42
+ for _ in trange(length):
43
+ output, hidden = model(input_seq, hidden)
44
+ predicted_idx = output.argmax().item()
45
+ generated_text += idx_to_char[predicted_idx]
46
+ input_seq = torch.cat((input_seq[1:], torch.tensor([predicted_idx])))
47
+
48
+ return generated_text
49
+
50
+ # Generate some text
51
+ while True:
52
+ prompt = input("Ask LLM: ")
53
+ length = int(input("Length of text: "))
54
+ print("LLM Output: ", generate_text(prompt, length))
train.py CHANGED
@@ -13,13 +13,14 @@ char_to_idx = {ch: i for i, ch in enumerate(chars)}
13
  idx_to_char = {i: ch for i, ch in enumerate(chars)}
14
 
15
  # Model parameters
 
16
  input_size = len(chars)
17
- hidden_size = 2048
18
  output_size = len(chars)
19
- sequence_length = 5
20
  epochs = 1000
21
- learning_rate = 0.0001
22
- model_path = "tiny_llm_hidden2048.pth"
23
 
24
  # Create training data (input-output pairs)
25
  train_data = []
@@ -42,67 +43,42 @@ class SimpleRNN(nn.Module):
42
  out = self.fc(out[:, -1, :]) # Take last time step's output
43
  return out, hidden
44
 
45
- # Load model if available
46
  if os.path.exists(model_path):
47
  model = torch.load(model_path, weights_only=False)
48
- with open("vocab.json", "r") as f:
49
- chars = json.loads(f.read())
50
- char_to_idx = {ch: i for i, ch in enumerate(chars)}
51
- idx_to_char = {i: ch for i, ch in enumerate(chars)}
52
- print("Loaded pre-trained model.")
53
  else:
54
  print("Training new model...")
55
- # Initialize the model
56
  model = SimpleRNN(input_size, hidden_size, output_size)
57
- criterion = nn.CrossEntropyLoss()
58
- optimizer = optim.Adam(model.parameters(), lr=learning_rate)
59
- for epoch in range(epochs):
60
- try:
61
- total_loss = 0
62
- hidden = torch.zeros(1, 1, hidden_size)
63
-
64
- pbar = tqdm(train_data, desc=f"Epoch={epoch}, Loss=N/A")
65
- count = 0
66
- for input_seq, target in pbar:
67
- count += 1
68
- optimizer.zero_grad()
69
- output, hidden = model(input_seq, hidden.detach())
70
- loss = criterion(output, torch.tensor([target]))
71
- loss.backward()
72
- optimizer.step()
73
- total_loss += loss.item()
74
- pbar.desc = f"Epoch={epoch}, Loss={total_loss / count:.12f}"
75
-
76
- pbar.close()
77
- time.sleep(1)
78
- except KeyboardInterrupt:
79
- break
80
-
81
- hidden = torch.zeros(1, 1, hidden_size)
82
- output, hidden = model(input_seq, hidden.detach())
83
 
84
- # Save the trained model
85
- torch.save(model, model_path)
86
- with open("vocab.json", "w") as f:
87
- f.write(json.dumps(chars))
88
- print("Model saved.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- # Text generation function
91
- def generate_text(start_text, length=10000):
92
- model.eval()
93
- hidden = torch.zeros(1, 1, hidden_size)
94
- input_seq = torch.tensor([char_to_idx[ch] for ch in start_text])
95
 
96
- generated_text = start_text
97
- for _ in trange(length):
98
- output, hidden = model(input_seq, hidden)
99
- predicted_idx = output.argmax().item()
100
- generated_text += idx_to_char[predicted_idx]
101
- input_seq = torch.cat((input_seq[1:], torch.tensor([predicted_idx])))
102
-
103
- return generated_text
104
-
105
- # Generate some text
106
-
107
- while True:
108
- print("LLM Output: ", generate_text(input("Ask LLM: ")))
 
13
  idx_to_char = {i: ch for i, ch in enumerate(chars)}
14
 
15
  # Model parameters
16
+ parameters = json.loads(open("parameter.json").read())
17
  input_size = len(chars)
18
+ hidden_size = parameters["hidden_size"]
19
  output_size = len(chars)
20
+ sequence_length = parameters["sequence_length"]
21
  epochs = 1000
22
+ learning_rate = parameters["learning_rate"]
23
+ model_path = parameters["model_path"]
24
 
25
  # Create training data (input-output pairs)
26
  train_data = []
 
43
  out = self.fc(out[:, -1, :]) # Take last time step's output
44
  return out, hidden
45
 
 
46
  if os.path.exists(model_path):
47
  model = torch.load(model_path, weights_only=False)
48
+ print("Loaded pre-trained model. Continue training...")
 
 
 
 
49
  else:
50
  print("Training new model...")
 
51
  model = SimpleRNN(input_size, hidden_size, output_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ criterion = nn.CrossEntropyLoss()
54
+ optimizer = optim.Adam(model.parameters(), lr=learning_rate)
55
+ for epoch in range(epochs):
56
+ try:
57
+ total_loss = 0
58
+ hidden = torch.zeros(1, 1, hidden_size)
59
+
60
+ pbar = tqdm(train_data, desc=f"Epoch={epoch}, Loss=N/A")
61
+ count = 0
62
+ for input_seq, target in pbar:
63
+ count += 1
64
+ optimizer.zero_grad()
65
+ output, hidden = model(input_seq, hidden.detach())
66
+ loss = criterion(output, torch.tensor([target]))
67
+ loss.backward()
68
+ optimizer.step()
69
+ total_loss += loss.item()
70
+ pbar.desc = f"Epoch={epoch}, Loss={total_loss / count:.12f}"
71
+
72
+ pbar.close()
73
+ time.sleep(1)
74
+ except KeyboardInterrupt:
75
+ break
76
 
77
+ hidden = torch.zeros(1, 1, hidden_size)
78
+ output, hidden = model(input_seq, hidden.detach())
 
 
 
79
 
80
+ # Save the trained model
81
+ torch.save(model, model_path)
82
+ with open("vocab.json", "w") as f:
83
+ f.write(json.dumps(chars))
84
+ print("Model saved.")