danieldk HF Staff commited on
Commit
28b4d27
·
1 Parent(s): a1925ca

Add layers

Browse files
Files changed (27) hide show
  1. build/torch25-cxx11-cu118-x86_64-linux/activation/__init__.py +14 -9
  2. build/torch25-cxx11-cu118-x86_64-linux/activation/layers.py +65 -0
  3. build/torch25-cxx11-cu121-x86_64-linux/activation/__init__.py +14 -9
  4. build/torch25-cxx11-cu121-x86_64-linux/activation/layers.py +65 -0
  5. build/torch25-cxx11-cu124-x86_64-linux/activation/__init__.py +14 -9
  6. build/torch25-cxx11-cu124-x86_64-linux/activation/layers.py +65 -0
  7. build/torch25-cxx98-cu118-x86_64-linux/activation/__init__.py +14 -9
  8. build/torch25-cxx98-cu118-x86_64-linux/activation/layers.py +65 -0
  9. build/torch25-cxx98-cu121-x86_64-linux/activation/__init__.py +14 -9
  10. build/torch25-cxx98-cu121-x86_64-linux/activation/layers.py +65 -0
  11. build/torch25-cxx98-cu124-x86_64-linux/activation/__init__.py +14 -9
  12. build/torch25-cxx98-cu124-x86_64-linux/activation/layers.py +65 -0
  13. build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py +14 -9
  14. build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py +65 -0
  15. build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py +14 -9
  16. build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py +65 -0
  17. build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py +14 -9
  18. build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py +65 -0
  19. build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py +14 -9
  20. build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py +65 -0
  21. build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py +14 -9
  22. build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py +65 -0
  23. build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py +14 -9
  24. build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py +65 -0
  25. tests/kernels/test_activation.py +30 -4
  26. torch-ext/activation/__init__.py +14 -9
  27. torch-ext/activation/layers.py +65 -0
build/torch25-cxx11-cu118-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch25-cxx11-cu118-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch25-cxx11-cu121-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch25-cxx11-cu121-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch25-cxx11-cu124-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch25-cxx11-cu124-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch25-cxx98-cu118-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch25-cxx98-cu118-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch25-cxx98-cu121-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch25-cxx98-cu121-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch25-cxx98-cu124-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch25-cxx98-cu124-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch26-cxx11-cu118-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch26-cxx11-cu118-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch26-cxx11-cu124-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch26-cxx11-cu124-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch26-cxx11-cu126-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch26-cxx11-cu126-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch26-cxx98-cu118-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch26-cxx98-cu118-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch26-cxx98-cu124-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch26-cxx98-cu124-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
build/torch26-cxx98-cu126-x86_64-linux/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
build/torch26-cxx98-cu126-x86_64-linux/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out
tests/kernels/test_activation.py CHANGED
@@ -71,28 +71,34 @@ def test_act_and_mul(
71
  torch_fn = silu_and_mul
72
  fn = activation.silu_and_mul
73
  op = activation.ops.silu_and_mul
 
74
  elif activation_name == "gelu":
75
  torch_fn = lambda x: gelu_and_mul(x, "none")
76
  fn = activation.gelu_and_mul
77
  op = activation.ops.gelu_and_mul
 
78
  elif activation_name == "gelu_tanh":
79
  torch_fn = lambda x: gelu_and_mul(x, "tanh")
80
  fn = activation.gelu_tanh_and_mul
81
  op = activation.ops.gelu_tanh_and_mul
 
82
  elif activation_name == "fatrelu":
83
  threshold = random.uniform(0, 1)
84
  torch_fn = lambda x: fatrelu_and_mul(x, threshold)
85
  fn = lambda out, x: activation.fatrelu_and_mul(out, x, threshold)
86
  op = activation.ops.fatrelu_and_mul
 
87
 
88
  out_shape = x.shape[:-1] + (x.shape[-1] // 2,)
89
  out = torch.empty(out_shape, dtype=x.dtype, device=x.device)
90
  out = fn(out, x)
 
91
  ref_out = torch_fn(x)
92
 
93
  # The SiLU, GELU and FatReLU implementations are equivalent to the native
94
  # PyTorch implementations, so we can do exact comparison.
95
  torch.testing.assert_close(out, ref_out, atol=0.0, rtol=0.0)
 
96
 
97
  d = x.shape[-1] // 2
98
  output_shape = x.shape[:-1] + (d,)
@@ -106,9 +112,24 @@ def test_act_and_mul(
106
  @pytest.mark.parametrize(
107
  "activation_fns",
108
  [
109
- (gelu_fast, activation.gelu_fast, activation.ops.gelu_fast),
110
- (gelu_new, activation.gelu_new, activation.ops.gelu_new),
111
- (gelu_quick, activation.gelu_quick, activation.ops.gelu_quick),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  ],
113
  )
114
  @pytest.mark.parametrize("num_tokens", NUM_TOKENS)
@@ -128,12 +149,17 @@ def test_activation(
128
  torch.manual_seed(seed)
129
  torch.set_default_device(device)
130
  x = torch.randn(num_tokens, d, dtype=dtype)
131
- torch_fn, fn, op = activation_fns
 
132
  out = fn(torch.empty_like(x), x)
 
133
  ref_out = torch_fn(x)
134
  torch.testing.assert_close(
135
  out, ref_out, atol=get_default_atol(out), rtol=get_default_rtol(out)
136
  )
 
 
 
137
 
138
  out = torch.empty_like(x)
139
  opcheck(op, (out, x))
 
71
  torch_fn = silu_and_mul
72
  fn = activation.silu_and_mul
73
  op = activation.ops.silu_and_mul
74
+ layer = activation.layers.SiluAndMul()
75
  elif activation_name == "gelu":
76
  torch_fn = lambda x: gelu_and_mul(x, "none")
77
  fn = activation.gelu_and_mul
78
  op = activation.ops.gelu_and_mul
79
+ layer = activation.layers.GeluAndMul()
80
  elif activation_name == "gelu_tanh":
81
  torch_fn = lambda x: gelu_and_mul(x, "tanh")
82
  fn = activation.gelu_tanh_and_mul
83
  op = activation.ops.gelu_tanh_and_mul
84
+ layer = activation.layers.GeluTanhAndMul()
85
  elif activation_name == "fatrelu":
86
  threshold = random.uniform(0, 1)
87
  torch_fn = lambda x: fatrelu_and_mul(x, threshold)
88
  fn = lambda out, x: activation.fatrelu_and_mul(out, x, threshold)
89
  op = activation.ops.fatrelu_and_mul
90
+ layer = activation.layers.FatreluAndMul(threshold)
91
 
92
  out_shape = x.shape[:-1] + (x.shape[-1] // 2,)
93
  out = torch.empty(out_shape, dtype=x.dtype, device=x.device)
94
  out = fn(out, x)
95
+ mod_out = layer(x)
96
  ref_out = torch_fn(x)
97
 
98
  # The SiLU, GELU and FatReLU implementations are equivalent to the native
99
  # PyTorch implementations, so we can do exact comparison.
100
  torch.testing.assert_close(out, ref_out, atol=0.0, rtol=0.0)
101
+ torch.testing.assert_close(mod_out, ref_out, atol=0.0, rtol=0.0)
102
 
103
  d = x.shape[-1] // 2
104
  output_shape = x.shape[:-1] + (d,)
 
112
  @pytest.mark.parametrize(
113
  "activation_fns",
114
  [
115
+ (
116
+ gelu_fast,
117
+ activation.gelu_fast,
118
+ activation.ops.gelu_fast,
119
+ activation.layers.FastGELU,
120
+ ),
121
+ (
122
+ gelu_new,
123
+ activation.gelu_new,
124
+ activation.ops.gelu_new,
125
+ activation.layers.NewGELU,
126
+ ),
127
+ (
128
+ gelu_quick,
129
+ activation.gelu_quick,
130
+ activation.ops.gelu_quick,
131
+ activation.layers.QuickGELU,
132
+ ),
133
  ],
134
  )
135
  @pytest.mark.parametrize("num_tokens", NUM_TOKENS)
 
149
  torch.manual_seed(seed)
150
  torch.set_default_device(device)
151
  x = torch.randn(num_tokens, d, dtype=dtype)
152
+ torch_fn, fn, op, cls = activation_fns
153
+ layer = cls()
154
  out = fn(torch.empty_like(x), x)
155
+ layer_out = layer(x)
156
  ref_out = torch_fn(x)
157
  torch.testing.assert_close(
158
  out, ref_out, atol=get_default_atol(out), rtol=get_default_rtol(out)
159
  )
160
+ torch.testing.assert_close(
161
+ out, layer_out, atol=get_default_atol(out), rtol=get_default_rtol(out)
162
+ )
163
 
164
  out = torch.empty_like(x)
165
  opcheck(op, (out, x))
torch-ext/activation/__init__.py CHANGED
@@ -1,15 +1,8 @@
1
  import torch
2
 
3
- try:
4
- from ._ops import ops
5
- except ImportError as e:
6
- # Fallback for local development.
7
- try:
8
- import _activation
9
 
10
- ops = torch.ops._activition
11
- except ImportError:
12
- raise e
13
 
14
 
15
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -45,3 +38,15 @@ def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
45
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
46
  ops.gelu_quick(out, x)
47
  return out
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
 
3
+ from ._ops import ops
 
 
 
 
 
4
 
5
+ from . import layers
 
 
6
 
7
 
8
  def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
 
38
  def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
39
  ops.gelu_quick(out, x)
40
  return out
41
+
42
+
43
+ __all__ = [
44
+ "silu_and_mul",
45
+ "gelu_and_mul",
46
+ "gelu_tanh_and_mul",
47
+ "fatrelu_and_mul",
48
+ "gelu_fast",
49
+ "gelu_new",
50
+ "gelu_quick",
51
+ "layers",
52
+ ]
torch-ext/activation/layers.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from ._ops import ops
5
+
6
+
7
+ class SiluAndMul(nn.Module):
8
+ def forward(self, x: torch.Tensor):
9
+ d = x.shape[-1] // 2
10
+ output_shape = x.shape[:-1] + (d,)
11
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
12
+ ops.silu_and_mul(out, x)
13
+ return out
14
+
15
+
16
+ class GeluAndMul(nn.Module):
17
+ def forward(self, x: torch.Tensor):
18
+ d = x.shape[-1] // 2
19
+ output_shape = x.shape[:-1] + (d,)
20
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
21
+ ops.gelu_and_mul(out, x)
22
+ return out
23
+
24
+
25
+ class GeluTanhAndMul(nn.Module):
26
+ def forward(self, x: torch.Tensor):
27
+ d = x.shape[-1] // 2
28
+ output_shape = x.shape[:-1] + (d,)
29
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
30
+ ops.gelu_tanh_and_mul(out, x)
31
+ return out
32
+
33
+
34
+ class FatreluAndMul(nn.Module):
35
+ def __init__(self, threshold: float = 0.0):
36
+ super().__init__()
37
+ self.threshold = threshold
38
+
39
+ def forward(self, x: torch.Tensor):
40
+ d = x.shape[-1] // 2
41
+ output_shape = x.shape[:-1] + (d,)
42
+ out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
43
+ ops.fatrelu_and_mul(out, x, self.threshold)
44
+ return out
45
+
46
+
47
+ class FastGELU(nn.Module):
48
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
49
+ out = torch.empty_like(x)
50
+ ops.gelu_fast(out, x)
51
+ return out
52
+
53
+
54
+ class NewGELU(nn.Module):
55
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
56
+ out = torch.empty_like(x)
57
+ ops.gelu_new(out, x)
58
+ return out
59
+
60
+
61
+ class QuickGELU(nn.Module):
62
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
63
+ out = torch.empty_like(x)
64
+ ops.gelu_quick(out, x)
65
+ return out