diff --git a/README.md b/README.md index e19d622..af6738f 100644 --- a/README.md +++ b/README.md @@ -1,28 +1 @@ - - - - -Let's tidy up any small things - - - -Write a really nice README on what it does, how it does and how can one use it - - - -Make it ready for release (will push to pypi) - - - - - -Make a GitHub action for this actually, so I don't have to do this manually - - - -There's already a pkg on pypi named "nanotorch", so we might need to name it something else - - - - - +# **smoltorch** diff --git a/examples/train_classification.py b/examples/train_classification.py new file mode 100644 index 0000000..be5ab8e --- /dev/null +++ b/examples/train_classification.py @@ -0,0 +1,88 @@ +import numpy as np +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from nanotorch.tensor import Tensor +from nanotorch.nn import MLP, SGD, binary_cross_entropy + +# Load breast cancer dataset (binary classification) +print("Loading breast cancer dataset...") +data = load_breast_cancer() +X, y = data.data, data.target.reshape(-1, 1) + +# Normalize features (important for neural networks!) +scaler = StandardScaler() +X = scaler.fit_transform(X) + +# Split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +print(f"Training samples: {X_train.shape[0]}") +print(f"Test samples: {X_test.shape[0]}") +print(f"Features: {X_train.shape[1]}") + +# Create model (note: output goes through sigmoid) +class BinaryClassifier(MLP): + def __call__(self, x): + # Forward pass through MLP + x = super().__call__(x) + # Apply sigmoid for probabilities + return x.sigmoid() + +model = BinaryClassifier([30, 16, 8, 1]) # 30 features -> 1 output probability +optimizer = SGD(model.parameters(), lr=0.1) + +# Training loop +epochs = 200 +print("\nTraining...") + +for epoch in range(epochs): + # Convert to tensors + X_tensor = Tensor(X_train) + y_tensor = Tensor(y_train) + + # Forward pass + y_pred = model(X_tensor) + + # Binary cross-entropy loss + loss = binary_cross_entropy(y_pred, y_tensor) + + # Backward pass + optimizer.zero_grad() + loss.backward() + + # After loss.backward(), before optimizer.step() + if (epoch + 1) % 20 == 0: + # Check gradient magnitudes + grad_norm = sum(np.sum(p.grad**2) for p in model.parameters()) + print(f"Epoch {epoch + 1}, Loss: {loss.data:.4f}, Grad norm: {grad_norm:.6f}") + + # Update weights + optimizer.step() + + # Calculate accuracy + if (epoch + 1) % 20 == 0: + predictions = (y_pred.data > 0.5).astype(float) + accuracy = (predictions == y_train).mean() + print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.data:.4f}, Accuracy: {accuracy:.4f}") + +# Evaluate on test set +print("\nEvaluating on test set...") +X_test_tensor = Tensor(X_test) +y_test_tensor = Tensor(y_test) + +y_pred_test = model(X_test_tensor) +test_loss = binary_cross_entropy(y_pred_test, y_test_tensor) + +predictions = (y_pred_test.data > 0.5).astype(float) +test_accuracy = (predictions == y_test).mean() + +print(f"Test Loss: {test_loss.data:.4f}") +print(f"Test Accuracy: {test_accuracy:.4f}") + +print("\nSample predictions:") +for i in range(5): + prob = y_pred_test.data[i, 0] + pred = "Malignant" if prob > 0.5 else "Benign" + true = "Malignant" if y_test[i, 0] == 1 else "Benign" + print(f"True: {true}, Predicted: {pred} (prob: {prob:.3f})") \ No newline at end of file diff --git a/nanotorch/nn.py b/nanotorch/nn.py index b48d8d1..bb790f5 100644 --- a/nanotorch/nn.py +++ b/nanotorch/nn.py @@ -1,6 +1,26 @@ import numpy as np from nanotorch.tensor import Tensor +# helper functions +def binary_cross_entropy(y_pred, y_true): + """ + Binary cross entropy loss with numerical stability + + Args: + y_pred: predicted probabilities, shape (batch_size, 1) + y_true: true labels (0 or 1), shape (batch_size, 1) + + Returns: + scalar loss + """ + # clip preds to avoid 'log(0)' + epsilon = 1e-7 + + # bce: -[y*log(p) + (1-y)*log(1-p)] + term1 = y_true * y_pred.log() + term2 = (Tensor(1.0) - y_true) * (Tensor(1.0) - y_pred + epsilon).log() + return -(term1 + term2).mean() + class Linear: def __init__(self, in_features, out_features): """ @@ -10,7 +30,9 @@ class Linear: in_features: input dims out_features: output dims """ - self.W = Tensor(np.random.randn(in_features, out_features) * 0.1) + # xavier/glorot initialization + limit = np.sqrt(6 / (in_features + out_features)) + self.W = Tensor(np.random.uniform(-limit, limit, (in_features, out_features))) self.b = Tensor(np.zeros(out_features)) def __call__(self, x): diff --git a/nanotorch/tensor.py b/nanotorch/tensor.py index a72917b..a906403 100644 --- a/nanotorch/tensor.py +++ b/nanotorch/tensor.py @@ -147,6 +147,15 @@ class Tensor: out._backward = _backward return out + def log(self) -> 'Tensor': + out = Tensor(np.log(self.data), (self, ), 'log') + + def _backward(): + self.grad += (1 / self.data) * out.grad + + out._backward = _backward + return out + def backward(self): # build topological order topo = [] @@ -187,3 +196,14 @@ class Tensor: out._backward = _backward return out + + def sigmoid(self) -> 'Tensor': + sig = 1 / (1 + np.exp(-self.data)) + out = Tensor(sig, (self, ), 'sigmoid') + + def _backward(): + self.grad += sig * (1 - sig) * out.grad + + out._backward = _backward + return out + \ No newline at end of file