mirror of
https://github.com/kashifulhaque/smoltorch.git
synced 2025-12-06 07:02:51 +00:00
nanotorch implementation
This commit is contained in:
37
tests/test_activations.py
Normal file
37
tests/test_activations.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from nanotorch.tensor import Tensor
|
||||
|
||||
# Test 1: ReLU
|
||||
print("Test 1 - ReLU:")
|
||||
x = Tensor([-2.0, -1.0, 0.0, 1.0, 2.0])
|
||||
y = x.relu()
|
||||
|
||||
y.backward()
|
||||
|
||||
print(f"x.data: {x.data}")
|
||||
print(f"y.data: {y.data}") # Should be [0, 0, 0, 1, 2]
|
||||
print(f"x.grad: {x.grad}") # Should be [0, 0, 0, 1, 1]
|
||||
|
||||
# Test 2: Tanh
|
||||
print("\nTest 2 - Tanh:")
|
||||
x = Tensor([0.0, 1.0, 2.0])
|
||||
y = x.tanh()
|
||||
|
||||
y.backward()
|
||||
|
||||
print(f"x.data: {x.data}")
|
||||
print(f"y.data: {y.data}") # Should be [0, 0.76, 0.96] approx
|
||||
print(f"x.grad: {x.grad}") # Should be [1, 0.42, 0.07] approx (1 - tanh²)
|
||||
|
||||
# Test 3: ReLU in a computation graph
|
||||
print("\nTest 3 - ReLU in computation:")
|
||||
x = Tensor([[-1.0, 2.0],
|
||||
[3.0, -4.0]])
|
||||
w = Tensor([[0.5, 0.5],
|
||||
[0.5, 0.5]])
|
||||
z = (x @ w).relu() # Linear layer + ReLU
|
||||
|
||||
z.backward()
|
||||
|
||||
print(f"z.data:\n{z.data}")
|
||||
print(f"x.grad:\n{x.grad}")
|
||||
print(f"w.grad:\n{w.grad}")
|
||||
23
tests/test_add.py
Normal file
23
tests/test_add.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from nanotorch.tensor import Tensor
|
||||
|
||||
# Test 1: Simple addition (no broadcasting)
|
||||
a = Tensor([1.0, 2.0, 3.0])
|
||||
b = Tensor([4.0, 5.0, 6.0])
|
||||
c = a + b
|
||||
|
||||
c.backward()
|
||||
|
||||
print("Test 1 - No broadcasting:")
|
||||
print(f"a.grad: {a.grad}") # Should be [1, 1, 1]
|
||||
print(f"b.grad: {b.grad}") # Should be [1, 1, 1]
|
||||
|
||||
# Test 2: Broadcasting
|
||||
a = Tensor([[1.0, 2.0]]) # shape (1, 2)
|
||||
b = Tensor([[3.0], [4.0]]) # shape (2, 1)
|
||||
c = a + b # shape (2, 2)
|
||||
|
||||
c.backward()
|
||||
|
||||
print("\nTest 2 - Broadcasting:")
|
||||
print(f"a.grad shape: {a.grad.shape}, values: {a.grad}") # Should be (1,2) with [[2, 2]]
|
||||
print(f"b.grad shape: {b.grad.shape}, values: {b.grad}") # Should be (2,1) with [[2], [2]]
|
||||
38
tests/test_linear.py
Normal file
38
tests/test_linear.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from nanotorch.nn import Linear
|
||||
from nanotorch.tensor import Tensor
|
||||
|
||||
# Test 1: Single sample forward pass
|
||||
print("Test 1 - Single forward pass:")
|
||||
layer = Linear(3, 2) # 3 inputs -> 2 outputs
|
||||
|
||||
x = Tensor([1.0, 2.0, 3.0]) # shape (3,)
|
||||
y = layer(x) # shape (2,)
|
||||
|
||||
print(f"x.shape: {x.data.shape}")
|
||||
print(f"y.shape: {y.data.shape}")
|
||||
print(f"y.data: {y.data}")
|
||||
|
||||
# Test 2: Batch forward pass
|
||||
print("\nTest 2 - Batch forward pass:")
|
||||
x_batch = Tensor([[1.0, 2.0, 3.0],
|
||||
[4.0, 5.0, 6.0]]) # shape (2, 3) - batch of 2
|
||||
y_batch = layer(x_batch) # shape (2, 2)
|
||||
|
||||
print(f"x_batch.shape: {x_batch.data.shape}")
|
||||
print(f"y_batch.shape: {y_batch.data.shape}")
|
||||
|
||||
# Test 3: Backward pass
|
||||
print("\nTest 3 - Backward pass:")
|
||||
x = Tensor([[1.0, 2.0]]) # shape (1, 2)
|
||||
layer = Linear(2, 3) # 2 -> 3
|
||||
|
||||
y = layer(x)
|
||||
loss = y.sum() # Simple loss for testing
|
||||
|
||||
loss.backward()
|
||||
|
||||
print(f"W.grad shape: {layer.W.grad.shape}") # Should be (2, 3)
|
||||
print(f"b.grad shape: {layer.b.grad.shape}") # Should be (3,)
|
||||
print(f"x.grad shape: {x.grad.shape}") # Should be (1, 2)
|
||||
print(f"W.grad:\n{layer.W.grad}")
|
||||
print(f"b.grad: {layer.b.grad}")
|
||||
30
tests/test_matmul.py
Normal file
30
tests/test_matmul.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from nanotorch.tensor import Tensor
|
||||
|
||||
# Test 1: Simple 2D matmul
|
||||
print("Test 1 - Simple 2D matmul:")
|
||||
x = Tensor([[1.0, 2.0, 3.0],
|
||||
[4.0, 5.0, 6.0]]) # (2, 3)
|
||||
y = Tensor([[7.0, 8.0],
|
||||
[9.0, 10.0],
|
||||
[11.0, 12.0]]) # (3, 2)
|
||||
z = x @ y # (2, 2)
|
||||
|
||||
z.backward()
|
||||
|
||||
print(f"z.data:\n{z.data}")
|
||||
print(f"x.grad:\n{x.grad}") # Should be z.grad @ y.T
|
||||
print(f"y.grad:\n{y.grad}") # Should be x.T @ z.grad
|
||||
|
||||
# Test 2: Vector-matrix multiplication
|
||||
print("\nTest 2 - Vector @ Matrix:")
|
||||
x = Tensor([1.0, 2.0, 3.0]) # (3,)
|
||||
y = Tensor([[4.0, 5.0],
|
||||
[6.0, 7.0],
|
||||
[8.0, 9.0]]) # (3, 2)
|
||||
z = x @ y # (2,)
|
||||
|
||||
z.backward()
|
||||
|
||||
print(f"z.data: {z.data}")
|
||||
print(f"x.grad: {x.grad}")
|
||||
print(f"y.grad:\n{y.grad}")
|
||||
45
tests/test_mlp.py
Normal file
45
tests/test_mlp.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import numpy as np
|
||||
from nanotorch.tensor import Tensor
|
||||
from nanotorch.nn import MLP
|
||||
|
||||
# Test 1: MLP forward pass
|
||||
print("Test 1 - MLP forward pass:")
|
||||
model = MLP([2, 4, 3, 1]) # 2 inputs -> 4 hidden -> 3 hidden -> 1 output
|
||||
|
||||
x = Tensor([[1.0, 2.0]]) # Single sample, shape (1, 2)
|
||||
y = model(x) # shape (1, 1)
|
||||
|
||||
print(f"Input shape: {x.data.shape}")
|
||||
print(f"Output shape: {y.data.shape}")
|
||||
print(f"Output value: {y.data}")
|
||||
|
||||
# Test 2: Batch processing
|
||||
print("\nTest 2 - Batch processing:")
|
||||
x_batch = Tensor([[1.0, 2.0],
|
||||
[3.0, 4.0],
|
||||
[5.0, 6.0]]) # 3 samples, shape (3, 2)
|
||||
y_batch = model(x_batch) # shape (3, 1)
|
||||
|
||||
print(f"Batch input shape: {x_batch.data.shape}")
|
||||
print(f"Batch output shape: {y_batch.data.shape}")
|
||||
|
||||
# Test 3: Backward pass through entire network
|
||||
print("\nTest 3 - Full backward pass:")
|
||||
x = Tensor([[1.0, 2.0]])
|
||||
y_pred = model(x)
|
||||
y_true = Tensor([[5.0]])
|
||||
|
||||
# MSE loss
|
||||
loss = ((y_pred - y_true) ** 2).mean()
|
||||
|
||||
loss.backward()
|
||||
|
||||
print(f"Loss: {loss.data}")
|
||||
print(f"Number of parameters: {len(model.parameters())}")
|
||||
print(f"First layer W.grad shape: {model.layers[0].W.grad.shape}")
|
||||
print(f"Last layer W.grad shape: {model.layers[-1].W.grad.shape}")
|
||||
|
||||
# Verify gradients exist for all parameters
|
||||
all_have_grads = all(np.any(p.grad != 0) or p.grad.shape == ()
|
||||
for p in model.parameters())
|
||||
print(f"All parameters have gradients: {all_have_grads}")
|
||||
38
tests/test_mul.py
Normal file
38
tests/test_mul.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from nanotorch.tensor import Tensor
|
||||
|
||||
# Test 1: Simple multiplication (no broadcasting)
|
||||
print("Test 1 - No broadcasting:")
|
||||
a = Tensor([2.0, 3.0, 4.0])
|
||||
b = Tensor([5.0, 6.0, 7.0])
|
||||
c = a * b # [10, 18, 28]
|
||||
|
||||
c.backward()
|
||||
|
||||
print(f"c.data: {c.data}")
|
||||
print(f"a.grad: {a.grad}") # Should be [5, 6, 7] (b's values)
|
||||
print(f"b.grad: {b.grad}") # Should be [2, 3, 4] (a's values)
|
||||
|
||||
# Test 2: Broadcasting case
|
||||
print("\nTest 2 - Broadcasting:")
|
||||
a = Tensor([[1.0, 2.0, 3.0]]) # shape (1, 3)
|
||||
b = Tensor([[2.0], [3.0]]) # shape (2, 1)
|
||||
c = a * b # shape (2, 3)
|
||||
|
||||
c.backward()
|
||||
|
||||
print(f"c.data:\n{c.data}")
|
||||
print(f"a.grad shape: {a.grad.shape}, values: {a.grad}") # Should be (1,3) with [[5, 5, 5]]
|
||||
print(f"b.grad shape: {b.grad.shape}, values: {b.grad}") # Should be (2,1) with [[6], [6]]
|
||||
|
||||
# Test 3: Chain rule test (addition + multiplication)
|
||||
print("\nTest 3 - Chain rule:")
|
||||
x = Tensor([2.0, 3.0])
|
||||
y = Tensor([4.0, 5.0])
|
||||
z = x * y # [8, 15]
|
||||
w = z + z # [16, 30]
|
||||
|
||||
w.backward()
|
||||
|
||||
print(f"w.data: {w.data}")
|
||||
print(f"x.grad: {x.grad}") # Should be [8, 10] (2 * y, because w = 2*x*y)
|
||||
print(f"y.grad: {y.grad}") # Should be [4, 6] (2 * x)
|
||||
58
tests/test_reductions.py
Normal file
58
tests/test_reductions.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from nanotorch.tensor import Tensor
|
||||
|
||||
# Test 1: Sum all elements
|
||||
print("Test 1 - Sum (all elements):")
|
||||
x = Tensor([[1.0, 2.0, 3.0],
|
||||
[4.0, 5.0, 6.0]]) # shape (2, 3)
|
||||
y = x.sum() # 21.0, shape ()
|
||||
|
||||
y.backward()
|
||||
|
||||
print(f"y.data: {y.data}")
|
||||
print(f"x.grad:\n{x.grad}") # Should be all 1s
|
||||
|
||||
# Test 2: Sum along axis
|
||||
print("\nTest 2 - Sum (axis=1):")
|
||||
x = Tensor([[1.0, 2.0, 3.0],
|
||||
[4.0, 5.0, 6.0]]) # shape (2, 3)
|
||||
y = x.sum(axis=1) # [6, 15], shape (2,)
|
||||
|
||||
y.backward()
|
||||
|
||||
print(f"y.data: {y.data}")
|
||||
print(f"x.grad:\n{x.grad}") # Should be all 1s
|
||||
|
||||
# Test 3: Mean all elements
|
||||
print("\nTest 3 - Mean (all elements):")
|
||||
x = Tensor([[2.0, 4.0],
|
||||
[6.0, 8.0]]) # shape (2, 2)
|
||||
y = x.mean() # 5.0, shape ()
|
||||
|
||||
y.backward()
|
||||
|
||||
print(f"y.data: {y.data}")
|
||||
print(f"x.grad:\n{x.grad}") # Should be all 0.25 (1/4)
|
||||
|
||||
# Test 4: Mean along axis
|
||||
print("\nTest 4 - Mean (axis=0):")
|
||||
x = Tensor([[1.0, 2.0],
|
||||
[3.0, 4.0]]) # shape (2, 2)
|
||||
y = x.mean(axis=0) # [2, 3], shape (2,)
|
||||
|
||||
y.backward()
|
||||
|
||||
print(f"y.data: {y.data}")
|
||||
print(f"x.grad:\n{x.grad}") # Should be all 0.5 (1/2)
|
||||
|
||||
# Test 5: Chain rule with operations
|
||||
print("\nTest 5 - MSE Loss simulation:")
|
||||
pred = Tensor([1.0, 2.0, 3.0])
|
||||
target = Tensor([1.5, 2.5, 2.0])
|
||||
diff = pred - target
|
||||
squared = diff * diff
|
||||
loss = squared.mean()
|
||||
|
||||
loss.backward()
|
||||
|
||||
print(f"loss.data: {loss.data}")
|
||||
print(f"pred.grad: {pred.grad}") # Should show gradient for each prediction
|
||||
Reference in New Issue
Block a user