ayyuce commited on
Commit
cec140f
·
verified ·
1 Parent(s): e3e4dd3

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +87 -3
README.md CHANGED
@@ -1,3 +1,87 @@
1
- ---
2
- license: gpl-3.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: gpl-3.0
3
+ datasets:
4
+ - karpathy/tiny_shakespeare
5
+ language:
6
+ - en
7
+ pipeline_tag: text-generation
8
+ ---
9
+
10
+ ## Usage
11
+
12
+ ```seq_length = 32
13
+ batch_size = 16
14
+ embed_dim = 256
15
+ num_heads = 4
16
+ ff_dim = 512
17
+ num_layers = 2
18
+ noise_prob = 0.3
19
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
+
21
+ class PositionalEncoding(nn.Module):
22
+ def __init__(self, d_model, max_len=5000):
23
+ super().__init__()
24
+ pe = torch.zeros(max_len, d_model)
25
+ position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
26
+ div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
27
+ pe[:, 0::2] = torch.sin(position * div_term)
28
+ pe[:, 1::2] = torch.cos(position * div_term)
29
+ self.register_buffer('pe', pe.unsqueeze(0))
30
+
31
+ def forward(self, x):
32
+ return x + self.pe[:, :x.size(1)]
33
+
34
+ class TransformerBlock(nn.Module):
35
+ def __init__(self, embed_dim, num_heads, ff_dim):
36
+ super().__init__()
37
+ self.attention = nn.MultiheadAttention(embed_dim, num_heads)
38
+ self.norm1 = nn.LayerNorm(embed_dim)
39
+ self.ff = nn.Sequential(
40
+ nn.Linear(embed_dim, ff_dim),
41
+ nn.ReLU(),
42
+ nn.Linear(ff_dim, embed_dim)
43
+ )
44
+ self.norm2 = nn.LayerNorm(embed_dim)
45
+
46
+ def forward(self, x):
47
+ attn_output, _ = self.attention(x, x, x)
48
+ x = self.norm1(x + attn_output)
49
+ ff_output = self.ff(x)
50
+ return self.norm2(x + ff_output)
51
+
52
+ class DenoisingTransformer(nn.Module):
53
+ def __init__(self, vocab_size, embed_dim, num_heads, ff_dim, num_layers):
54
+ super().__init__()
55
+ self.embedding = nn.Embedding(vocab_size, embed_dim)
56
+ self.positional_encoding = PositionalEncoding(embed_dim)
57
+ self.transformer_blocks = nn.ModuleList([
58
+ TransformerBlock(embed_dim, num_heads, ff_dim) for _ in range(num_layers)
59
+ ])
60
+ self.fc = nn.Linear(embed_dim, vocab_size)
61
+
62
+ def forward(self, x):
63
+ x = self.embedding(x)
64
+ x = self.positional_encoding(x)
65
+ for block in self.transformer_blocks:
66
+ x = block(x)
67
+ return self.fc(x)
68
+
69
+ def load_model(path, device='cpu'):
70
+ checkpoint = torch.load(path, map_location=device)
71
+ hp = checkpoint['hyperparameters']
72
+
73
+ model = DenoisingTransformer(
74
+ hp['vocab_size'],
75
+ hp['embed_dim'],
76
+ hp['num_heads'],
77
+ hp['ff_dim'],
78
+ hp['num_layers']
79
+ ).to(device)
80
+
81
+ model.load_state_dict(checkpoint['model_state_dict'])
82
+ return model, checkpoint['word2idx'], checkpoint['idx2word']
83
+
84
+ loaded_model, word2idx, idx2word = load_model('denoising_transformer.pth', device=device)
85
+
86
+ print("Model loaded successfully!")
87
+ print(f"Model device: {next(loaded_model.parameters()).device}")```