nanotorch implementation

2025-12-06 07:02:51 +00:00 · 2025-11-17 21:32:28 +05:30
parent 618423e8d2
commit a6e32fcc71
19 changed files with 809 additions and 2 deletions
--- a/tests/test_activations.py
+++ b/tests/test_activations.py
@@ -0,0 +1,37 @@
+from nanotorch.tensor import Tensor
+
+# Test 1: ReLU
+print("Test 1 - ReLU:")
+x = Tensor([-2.0, -1.0, 0.0, 1.0, 2.0])
+y = x.relu()
+
+y.backward()
+
+print(f"x.data: {x.data}")
+print(f"y.data: {y.data}")      # Should be [0, 0, 0, 1, 2]
+print(f"x.grad: {x.grad}")      # Should be [0, 0, 0, 1, 1]
+
+# Test 2: Tanh
+print("\nTest 2 - Tanh:")
+x = Tensor([0.0, 1.0, 2.0])
+y = x.tanh()
+
+y.backward()
+
+print(f"x.data: {x.data}")
+print(f"y.data: {y.data}")      # Should be [0, 0.76, 0.96] approx
+print(f"x.grad: {x.grad}")      # Should be [1, 0.42, 0.07] approx (1 - tanh²)
+
+# Test 3: ReLU in a computation graph
+print("\nTest 3 - ReLU in computation:")
+x = Tensor([[-1.0, 2.0],
+            [3.0, -4.0]])
+w = Tensor([[0.5, 0.5],
+            [0.5, 0.5]])
+z = (x @ w).relu()  # Linear layer + ReLU
+
+z.backward()
+
+print(f"z.data:\n{z.data}")
+print(f"x.grad:\n{x.grad}")
+print(f"w.grad:\n{w.grad}")
--- a/tests/test_add.py
+++ b/tests/test_add.py
@@ -0,0 +1,23 @@
+from nanotorch.tensor import Tensor
+
+# Test 1: Simple addition (no broadcasting)
+a = Tensor([1.0, 2.0, 3.0])
+b = Tensor([4.0, 5.0, 6.0])
+c = a + b
+
+c.backward()
+
+print("Test 1 - No broadcasting:")
+print(f"a.grad: {a.grad}")  # Should be [1, 1, 1]
+print(f"b.grad: {b.grad}")  # Should be [1, 1, 1]
+
+# Test 2: Broadcasting
+a = Tensor([[1.0, 2.0]])       # shape (1, 2)
+b = Tensor([[3.0], [4.0]])     # shape (2, 1)
+c = a + b                      # shape (2, 2)
+
+c.backward() 
+
+print("\nTest 2 - Broadcasting:")
+print(f"a.grad shape: {a.grad.shape}, values: {a.grad}")  # Should be (1,2) with [[2, 2]]
+print(f"b.grad shape: {b.grad.shape}, values: {b.grad}")  # Should be (2,1) with [[2], [2]]
--- a/tests/test_linear.py
+++ b/tests/test_linear.py
@@ -0,0 +1,38 @@
+from nanotorch.nn import Linear
+from nanotorch.tensor import Tensor
+
+# Test 1: Single sample forward pass
+print("Test 1 - Single forward pass:")
+layer = Linear(3, 2)  # 3 inputs -> 2 outputs
+
+x = Tensor([1.0, 2.0, 3.0])  # shape (3,)
+y = layer(x)                  # shape (2,)
+
+print(f"x.shape: {x.data.shape}")
+print(f"y.shape: {y.data.shape}")
+print(f"y.data: {y.data}")
+
+# Test 2: Batch forward pass
+print("\nTest 2 - Batch forward pass:")
+x_batch = Tensor([[1.0, 2.0, 3.0],
+                  [4.0, 5.0, 6.0]])  # shape (2, 3) - batch of 2
+y_batch = layer(x_batch)             # shape (2, 2)
+
+print(f"x_batch.shape: {x_batch.data.shape}")
+print(f"y_batch.shape: {y_batch.data.shape}")
+
+# Test 3: Backward pass
+print("\nTest 3 - Backward pass:")
+x = Tensor([[1.0, 2.0]])  # shape (1, 2)
+layer = Linear(2, 3)       # 2 -> 3
+
+y = layer(x)
+loss = y.sum()  # Simple loss for testing
+
+loss.backward()
+
+print(f"W.grad shape: {layer.W.grad.shape}")  # Should be (2, 3)
+print(f"b.grad shape: {layer.b.grad.shape}")  # Should be (3,)
+print(f"x.grad shape: {x.grad.shape}")        # Should be (1, 2)
+print(f"W.grad:\n{layer.W.grad}")
+print(f"b.grad: {layer.b.grad}")
--- a/tests/test_matmul.py
+++ b/tests/test_matmul.py
@@ -0,0 +1,30 @@
+from nanotorch.tensor import Tensor
+
+# Test 1: Simple 2D matmul
+print("Test 1 - Simple 2D matmul:")
+x = Tensor([[1.0, 2.0, 3.0],
+            [4.0, 5.0, 6.0]])      # (2, 3)
+y = Tensor([[7.0, 8.0],
+            [9.0, 10.0],
+            [11.0, 12.0]])          # (3, 2)
+z = x @ y                           # (2, 2)
+
+z.backward()
+
+print(f"z.data:\n{z.data}")
+print(f"x.grad:\n{x.grad}")  # Should be z.grad @ y.T
+print(f"y.grad:\n{y.grad}")  # Should be x.T @ z.grad
+
+# Test 2: Vector-matrix multiplication
+print("\nTest 2 - Vector @ Matrix:")
+x = Tensor([1.0, 2.0, 3.0])        # (3,)
+y = Tensor([[4.0, 5.0],
+            [6.0, 7.0],
+            [8.0, 9.0]])            # (3, 2)
+z = x @ y                           # (2,)
+
+z.backward()
+
+print(f"z.data: {z.data}")
+print(f"x.grad: {x.grad}")
+print(f"y.grad:\n{y.grad}")
--- a/tests/test_mlp.py
+++ b/tests/test_mlp.py
@@ -0,0 +1,45 @@
+import numpy as np
+from nanotorch.tensor import Tensor
+from nanotorch.nn import MLP
+
+# Test 1: MLP forward pass
+print("Test 1 - MLP forward pass:")
+model = MLP([2, 4, 3, 1])  # 2 inputs -> 4 hidden -> 3 hidden -> 1 output
+
+x = Tensor([[1.0, 2.0]])   # Single sample, shape (1, 2)
+y = model(x)                # shape (1, 1)
+
+print(f"Input shape: {x.data.shape}")
+print(f"Output shape: {y.data.shape}")
+print(f"Output value: {y.data}")
+
+# Test 2: Batch processing
+print("\nTest 2 - Batch processing:")
+x_batch = Tensor([[1.0, 2.0],
+                  [3.0, 4.0],
+                  [5.0, 6.0]])  # 3 samples, shape (3, 2)
+y_batch = model(x_batch)        # shape (3, 1)
+
+print(f"Batch input shape: {x_batch.data.shape}")
+print(f"Batch output shape: {y_batch.data.shape}")
+
+# Test 3: Backward pass through entire network
+print("\nTest 3 - Full backward pass:")
+x = Tensor([[1.0, 2.0]])
+y_pred = model(x)
+y_true = Tensor([[5.0]])
+
+# MSE loss
+loss = ((y_pred - y_true) ** 2).mean()
+
+loss.backward()
+
+print(f"Loss: {loss.data}")
+print(f"Number of parameters: {len(model.parameters())}")
+print(f"First layer W.grad shape: {model.layers[0].W.grad.shape}")
+print(f"Last layer W.grad shape: {model.layers[-1].W.grad.shape}")
+
+# Verify gradients exist for all parameters
+all_have_grads = all(np.any(p.grad != 0) or p.grad.shape == () 
+                     for p in model.parameters())
+print(f"All parameters have gradients: {all_have_grads}")
--- a/tests/test_mul.py
+++ b/tests/test_mul.py
@@ -0,0 +1,38 @@
+from nanotorch.tensor import Tensor
+
+# Test 1: Simple multiplication (no broadcasting)
+print("Test 1 - No broadcasting:")
+a = Tensor([2.0, 3.0, 4.0])
+b = Tensor([5.0, 6.0, 7.0])
+c = a * b  # [10, 18, 28]
+
+c.backward()
+
+print(f"c.data: {c.data}")
+print(f"a.grad: {a.grad}")  # Should be [5, 6, 7] (b's values)
+print(f"b.grad: {b.grad}")  # Should be [2, 3, 4] (a's values)
+
+# Test 2: Broadcasting case
+print("\nTest 2 - Broadcasting:")
+a = Tensor([[1.0, 2.0, 3.0]])  # shape (1, 3)
+b = Tensor([[2.0], [3.0]])      # shape (2, 1)
+c = a * b  # shape (2, 3)
+
+c.backward()
+
+print(f"c.data:\n{c.data}")
+print(f"a.grad shape: {a.grad.shape}, values: {a.grad}")  # Should be (1,3) with [[5, 5, 5]]
+print(f"b.grad shape: {b.grad.shape}, values: {b.grad}")  # Should be (2,1) with [[6], [6]]
+
+# Test 3: Chain rule test (addition + multiplication)
+print("\nTest 3 - Chain rule:")
+x = Tensor([2.0, 3.0])
+y = Tensor([4.0, 5.0])
+z = x * y       # [8, 15]
+w = z + z       # [16, 30]
+
+w.backward()
+
+print(f"w.data: {w.data}")
+print(f"x.grad: {x.grad}")  # Should be [8, 10] (2 * y, because w = 2*x*y)
+print(f"y.grad: {y.grad}")  # Should be [4, 6] (2 * x)
--- a/tests/test_reductions.py
+++ b/tests/test_reductions.py
@@ -0,0 +1,58 @@
+from nanotorch.tensor import Tensor
+
+# Test 1: Sum all elements
+print("Test 1 - Sum (all elements):")
+x = Tensor([[1.0, 2.0, 3.0],
+            [4.0, 5.0, 6.0]])  # shape (2, 3)
+y = x.sum()                     # 21.0, shape ()
+
+y.backward()
+
+print(f"y.data: {y.data}")
+print(f"x.grad:\n{x.grad}")  # Should be all 1s
+
+# Test 2: Sum along axis
+print("\nTest 2 - Sum (axis=1):")
+x = Tensor([[1.0, 2.0, 3.0],
+            [4.0, 5.0, 6.0]])  # shape (2, 3)
+y = x.sum(axis=1)               # [6, 15], shape (2,)
+
+y.backward()
+
+print(f"y.data: {y.data}")
+print(f"x.grad:\n{x.grad}")  # Should be all 1s
+
+# Test 3: Mean all elements
+print("\nTest 3 - Mean (all elements):")
+x = Tensor([[2.0, 4.0],
+            [6.0, 8.0]])  # shape (2, 2)
+y = x.mean()               # 5.0, shape ()
+
+y.backward()
+
+print(f"y.data: {y.data}")
+print(f"x.grad:\n{x.grad}")  # Should be all 0.25 (1/4)
+
+# Test 4: Mean along axis
+print("\nTest 4 - Mean (axis=0):")
+x = Tensor([[1.0, 2.0],
+            [3.0, 4.0]])  # shape (2, 2)
+y = x.mean(axis=0)         # [2, 3], shape (2,)
+
+y.backward()
+
+print(f"y.data: {y.data}")
+print(f"x.grad:\n{x.grad}")  # Should be all 0.5 (1/2)
+
+# Test 5: Chain rule with operations
+print("\nTest 5 - MSE Loss simulation:")
+pred = Tensor([1.0, 2.0, 3.0])
+target = Tensor([1.5, 2.5, 2.0])
+diff = pred - target
+squared = diff * diff
+loss = squared.mean()
+
+loss.backward()
+
+print(f"loss.data: {loss.data}")
+print(f"pred.grad: {pred.grad}")  # Should show gradient for each prediction