nanotorch implementation

This commit is contained in:
2025-11-17 21:32:28 +05:30
parent 618423e8d2
commit a6e32fcc71
19 changed files with 809 additions and 2 deletions

37
tests/test_activations.py Normal file
View File

@@ -0,0 +1,37 @@
from nanotorch.tensor import Tensor
# Test 1: ReLU
print("Test 1 - ReLU:")
x = Tensor([-2.0, -1.0, 0.0, 1.0, 2.0])
y = x.relu()
y.backward()
print(f"x.data: {x.data}")
print(f"y.data: {y.data}") # Should be [0, 0, 0, 1, 2]
print(f"x.grad: {x.grad}") # Should be [0, 0, 0, 1, 1]
# Test 2: Tanh
print("\nTest 2 - Tanh:")
x = Tensor([0.0, 1.0, 2.0])
y = x.tanh()
y.backward()
print(f"x.data: {x.data}")
print(f"y.data: {y.data}") # Should be [0, 0.76, 0.96] approx
print(f"x.grad: {x.grad}") # Should be [1, 0.42, 0.07] approx (1 - tanh²)
# Test 3: ReLU in a computation graph
print("\nTest 3 - ReLU in computation:")
x = Tensor([[-1.0, 2.0],
[3.0, -4.0]])
w = Tensor([[0.5, 0.5],
[0.5, 0.5]])
z = (x @ w).relu() # Linear layer + ReLU
z.backward()
print(f"z.data:\n{z.data}")
print(f"x.grad:\n{x.grad}")
print(f"w.grad:\n{w.grad}")

23
tests/test_add.py Normal file
View File

@@ -0,0 +1,23 @@
from nanotorch.tensor import Tensor
# Test 1: Simple addition (no broadcasting)
a = Tensor([1.0, 2.0, 3.0])
b = Tensor([4.0, 5.0, 6.0])
c = a + b
c.backward()
print("Test 1 - No broadcasting:")
print(f"a.grad: {a.grad}") # Should be [1, 1, 1]
print(f"b.grad: {b.grad}") # Should be [1, 1, 1]
# Test 2: Broadcasting
a = Tensor([[1.0, 2.0]]) # shape (1, 2)
b = Tensor([[3.0], [4.0]]) # shape (2, 1)
c = a + b # shape (2, 2)
c.backward()
print("\nTest 2 - Broadcasting:")
print(f"a.grad shape: {a.grad.shape}, values: {a.grad}") # Should be (1,2) with [[2, 2]]
print(f"b.grad shape: {b.grad.shape}, values: {b.grad}") # Should be (2,1) with [[2], [2]]

38
tests/test_linear.py Normal file
View File

@@ -0,0 +1,38 @@
from nanotorch.nn import Linear
from nanotorch.tensor import Tensor
# Test 1: Single sample forward pass
print("Test 1 - Single forward pass:")
layer = Linear(3, 2) # 3 inputs -> 2 outputs
x = Tensor([1.0, 2.0, 3.0]) # shape (3,)
y = layer(x) # shape (2,)
print(f"x.shape: {x.data.shape}")
print(f"y.shape: {y.data.shape}")
print(f"y.data: {y.data}")
# Test 2: Batch forward pass
print("\nTest 2 - Batch forward pass:")
x_batch = Tensor([[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0]]) # shape (2, 3) - batch of 2
y_batch = layer(x_batch) # shape (2, 2)
print(f"x_batch.shape: {x_batch.data.shape}")
print(f"y_batch.shape: {y_batch.data.shape}")
# Test 3: Backward pass
print("\nTest 3 - Backward pass:")
x = Tensor([[1.0, 2.0]]) # shape (1, 2)
layer = Linear(2, 3) # 2 -> 3
y = layer(x)
loss = y.sum() # Simple loss for testing
loss.backward()
print(f"W.grad shape: {layer.W.grad.shape}") # Should be (2, 3)
print(f"b.grad shape: {layer.b.grad.shape}") # Should be (3,)
print(f"x.grad shape: {x.grad.shape}") # Should be (1, 2)
print(f"W.grad:\n{layer.W.grad}")
print(f"b.grad: {layer.b.grad}")

30
tests/test_matmul.py Normal file
View File

@@ -0,0 +1,30 @@
from nanotorch.tensor import Tensor
# Test 1: Simple 2D matmul
print("Test 1 - Simple 2D matmul:")
x = Tensor([[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0]]) # (2, 3)
y = Tensor([[7.0, 8.0],
[9.0, 10.0],
[11.0, 12.0]]) # (3, 2)
z = x @ y # (2, 2)
z.backward()
print(f"z.data:\n{z.data}")
print(f"x.grad:\n{x.grad}") # Should be z.grad @ y.T
print(f"y.grad:\n{y.grad}") # Should be x.T @ z.grad
# Test 2: Vector-matrix multiplication
print("\nTest 2 - Vector @ Matrix:")
x = Tensor([1.0, 2.0, 3.0]) # (3,)
y = Tensor([[4.0, 5.0],
[6.0, 7.0],
[8.0, 9.0]]) # (3, 2)
z = x @ y # (2,)
z.backward()
print(f"z.data: {z.data}")
print(f"x.grad: {x.grad}")
print(f"y.grad:\n{y.grad}")

45
tests/test_mlp.py Normal file
View File

@@ -0,0 +1,45 @@
import numpy as np
from nanotorch.tensor import Tensor
from nanotorch.nn import MLP
# Test 1: MLP forward pass
print("Test 1 - MLP forward pass:")
model = MLP([2, 4, 3, 1]) # 2 inputs -> 4 hidden -> 3 hidden -> 1 output
x = Tensor([[1.0, 2.0]]) # Single sample, shape (1, 2)
y = model(x) # shape (1, 1)
print(f"Input shape: {x.data.shape}")
print(f"Output shape: {y.data.shape}")
print(f"Output value: {y.data}")
# Test 2: Batch processing
print("\nTest 2 - Batch processing:")
x_batch = Tensor([[1.0, 2.0],
[3.0, 4.0],
[5.0, 6.0]]) # 3 samples, shape (3, 2)
y_batch = model(x_batch) # shape (3, 1)
print(f"Batch input shape: {x_batch.data.shape}")
print(f"Batch output shape: {y_batch.data.shape}")
# Test 3: Backward pass through entire network
print("\nTest 3 - Full backward pass:")
x = Tensor([[1.0, 2.0]])
y_pred = model(x)
y_true = Tensor([[5.0]])
# MSE loss
loss = ((y_pred - y_true) ** 2).mean()
loss.backward()
print(f"Loss: {loss.data}")
print(f"Number of parameters: {len(model.parameters())}")
print(f"First layer W.grad shape: {model.layers[0].W.grad.shape}")
print(f"Last layer W.grad shape: {model.layers[-1].W.grad.shape}")
# Verify gradients exist for all parameters
all_have_grads = all(np.any(p.grad != 0) or p.grad.shape == ()
for p in model.parameters())
print(f"All parameters have gradients: {all_have_grads}")

38
tests/test_mul.py Normal file
View File

@@ -0,0 +1,38 @@
from nanotorch.tensor import Tensor
# Test 1: Simple multiplication (no broadcasting)
print("Test 1 - No broadcasting:")
a = Tensor([2.0, 3.0, 4.0])
b = Tensor([5.0, 6.0, 7.0])
c = a * b # [10, 18, 28]
c.backward()
print(f"c.data: {c.data}")
print(f"a.grad: {a.grad}") # Should be [5, 6, 7] (b's values)
print(f"b.grad: {b.grad}") # Should be [2, 3, 4] (a's values)
# Test 2: Broadcasting case
print("\nTest 2 - Broadcasting:")
a = Tensor([[1.0, 2.0, 3.0]]) # shape (1, 3)
b = Tensor([[2.0], [3.0]]) # shape (2, 1)
c = a * b # shape (2, 3)
c.backward()
print(f"c.data:\n{c.data}")
print(f"a.grad shape: {a.grad.shape}, values: {a.grad}") # Should be (1,3) with [[5, 5, 5]]
print(f"b.grad shape: {b.grad.shape}, values: {b.grad}") # Should be (2,1) with [[6], [6]]
# Test 3: Chain rule test (addition + multiplication)
print("\nTest 3 - Chain rule:")
x = Tensor([2.0, 3.0])
y = Tensor([4.0, 5.0])
z = x * y # [8, 15]
w = z + z # [16, 30]
w.backward()
print(f"w.data: {w.data}")
print(f"x.grad: {x.grad}") # Should be [8, 10] (2 * y, because w = 2*x*y)
print(f"y.grad: {y.grad}") # Should be [4, 6] (2 * x)

58
tests/test_reductions.py Normal file
View File

@@ -0,0 +1,58 @@
from nanotorch.tensor import Tensor
# Test 1: Sum all elements
print("Test 1 - Sum (all elements):")
x = Tensor([[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0]]) # shape (2, 3)
y = x.sum() # 21.0, shape ()
y.backward()
print(f"y.data: {y.data}")
print(f"x.grad:\n{x.grad}") # Should be all 1s
# Test 2: Sum along axis
print("\nTest 2 - Sum (axis=1):")
x = Tensor([[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0]]) # shape (2, 3)
y = x.sum(axis=1) # [6, 15], shape (2,)
y.backward()
print(f"y.data: {y.data}")
print(f"x.grad:\n{x.grad}") # Should be all 1s
# Test 3: Mean all elements
print("\nTest 3 - Mean (all elements):")
x = Tensor([[2.0, 4.0],
[6.0, 8.0]]) # shape (2, 2)
y = x.mean() # 5.0, shape ()
y.backward()
print(f"y.data: {y.data}")
print(f"x.grad:\n{x.grad}") # Should be all 0.25 (1/4)
# Test 4: Mean along axis
print("\nTest 4 - Mean (axis=0):")
x = Tensor([[1.0, 2.0],
[3.0, 4.0]]) # shape (2, 2)
y = x.mean(axis=0) # [2, 3], shape (2,)
y.backward()
print(f"y.data: {y.data}")
print(f"x.grad:\n{x.grad}") # Should be all 0.5 (1/2)
# Test 5: Chain rule with operations
print("\nTest 5 - MSE Loss simulation:")
pred = Tensor([1.0, 2.0, 3.0])
target = Tensor([1.5, 2.5, 2.0])
diff = pred - target
squared = diff * diff
loss = squared.mean()
loss.backward()
print(f"loss.data: {loss.data}")
print(f"pred.grad: {pred.grad}") # Should show gradient for each prediction