mirror of
https://github.com/kashifulhaque/smoltorch.git
synced 2025-12-06 07:02:51 +00:00
642 lines
36 KiB
Plaintext
642 lines
36 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "1742e66e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import math\n",
|
|
"import mlx.core as mx\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"%matplotlib inline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "e010d996",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def f(x):\n",
|
|
" return 3*x**2 - 4*x + 5"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "257a1b89",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"20.0"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"f(3.0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "17691e6d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<matplotlib.lines.Line2D at 0x118cd58b0>]"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"xs = mx.arange(-5, 5, 0.25)\n",
|
|
"ys = f(xs)\n",
|
|
"\n",
|
|
"plt.plot(xs, ys)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "3a6e6a0c",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Simple refresher on differentiation**\n",
|
|
"$$\n",
|
|
"L = \\lim_{h \\rightarrow 0}\\frac{f(x + h) - f(x)}{h}\n",
|
|
"$$"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "152355b0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"h = 0.0001\n",
|
|
"x = 3.0"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "cbb72ce5",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(20.0, 20.001400030000006)"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"f(x), f(x + h)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "1d17c49f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"14.000300000063248"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"(f(x + h) - f(x)) / h"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d0ec3cf4",
|
|
"metadata": {},
|
|
"source": [
|
|
"### **micrograd implementation**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "02c2f4eb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class Value:\n",
|
|
" def __init__(self, data, _parents=(), _op=''):\n",
|
|
" self.data = data\n",
|
|
" self._parents = _parents\n",
|
|
" self._op = _op\n",
|
|
"\n",
|
|
" # gradient\n",
|
|
" self.grad = 0.0 # at init, the value does not affect the output\n",
|
|
" self._backward = lambda: None\n",
|
|
" \n",
|
|
" def __repr__(self):\n",
|
|
" return f\"Value(data={self.data})\"\n",
|
|
" \n",
|
|
" def __add__(self, other: 'Value') -> 'Value':\n",
|
|
" other = other if isinstance(other, Value) else Value(other)\n",
|
|
" out = Value(self.data + other.data, (self, other), '+')\n",
|
|
"\n",
|
|
" def _backward():\n",
|
|
" self.grad += 1.0 * out.grad\n",
|
|
" other.grad += 1.0 * out.grad\n",
|
|
" out._backward = _backward\n",
|
|
"\n",
|
|
" return out\n",
|
|
" \n",
|
|
" def __radd__(self, other: 'Value') -> 'Value':\n",
|
|
" return self + other\n",
|
|
" \n",
|
|
" def __mul__(self, other: 'Value') -> 'Value':\n",
|
|
" other = other if isinstance(other, Value) else Value(other)\n",
|
|
" out = Value(self.data * other.data, (self, other), '*')\n",
|
|
"\n",
|
|
" def _backward():\n",
|
|
" self.grad += other.data * out.grad\n",
|
|
" other.grad += self.data * out.grad\n",
|
|
" out._backward = _backward\n",
|
|
"\n",
|
|
" return out\n",
|
|
" \n",
|
|
" def __neg__(self) -> 'Value':\n",
|
|
" return -1 * self\n",
|
|
" \n",
|
|
" def __sub__(self, other: 'Value') -> 'Value':\n",
|
|
" return self + (-other)\n",
|
|
" \n",
|
|
" def __rsub__(self, other: 'Value') -> 'Value':\n",
|
|
" return Value(other) - self\n",
|
|
" \n",
|
|
" def __rmul__(self, other: 'Value') -> 'Value':\n",
|
|
" return self * other\n",
|
|
" \n",
|
|
" def __pow__(self, other: 'Value') -> 'Value':\n",
|
|
" assert isinstance(other, (int, float)), \"only support int/float powers for now\"\n",
|
|
" out = Value(self.data**other, (self, ), f'**{other}')\n",
|
|
"\n",
|
|
" def _backward():\n",
|
|
" self.grad += (other * self.data**(other - 1)) * out.grad\n",
|
|
" out._backward = _backward\n",
|
|
"\n",
|
|
" return out\n",
|
|
" \n",
|
|
" def __truediv__(self, other: 'Value') -> 'Value':\n",
|
|
" return self * other**-1\n",
|
|
" \n",
|
|
" def tanh(self) -> 'Value':\n",
|
|
" x = self.data\n",
|
|
" _tanh = (math.exp(2*x) - 1) / (math.exp(2*x) + 1)\n",
|
|
" out = Value(_tanh, (self, ), 'tanh')\n",
|
|
"\n",
|
|
" def _backward():\n",
|
|
" self.grad += (1 - _tanh ** 2) * out.grad\n",
|
|
" out._backward = _backward\n",
|
|
"\n",
|
|
" return out\n",
|
|
" \n",
|
|
" def exp(self) -> 'Value':\n",
|
|
" x = self.data\n",
|
|
" out = Value(math.exp(x), (self, ), 'exp')\n",
|
|
"\n",
|
|
" def _backward():\n",
|
|
" self.grad += out.data * out.grad\n",
|
|
" out._backward = _backward\n",
|
|
"\n",
|
|
" return out\n",
|
|
" \n",
|
|
" def backward(self):\n",
|
|
" topo = []\n",
|
|
" visited = set()\n",
|
|
"\n",
|
|
" def build_topo(v: 'Value'):\n",
|
|
" if v not in visited:\n",
|
|
" visited.add(v)\n",
|
|
" \n",
|
|
" for child in v._parents:\n",
|
|
" build_topo(child)\n",
|
|
" \n",
|
|
" topo.append(v)\n",
|
|
" \n",
|
|
" build_topo(self)\n",
|
|
"\n",
|
|
" self.grad = 1.0\n",
|
|
" for node in reversed(topo):\n",
|
|
" node._backward()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "015176d1",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Gradient: -3.000000000010772\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# manual backprop\n",
|
|
"a = Value(2.0)\n",
|
|
"b = Value(-3.0)\n",
|
|
"c = Value(10.0)\n",
|
|
"d = a*b + c\n",
|
|
"\n",
|
|
"# If we change 'a' by a small amount 'h'\n",
|
|
"# How would the gradient change?\n",
|
|
"a = Value(a.data + h)\n",
|
|
"d_ = a*b + c\n",
|
|
"print(f\"Gradient: {(d_.data - d.data)/h}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "42a72d0d",
|
|
"metadata": {},
|
|
"source": [
|
|
"**autograd example**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "f7f36924",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"x1 = Value(2.0)\n",
|
|
"x2 = Value(0.0)\n",
|
|
"\n",
|
|
"w1 = Value(-3.0)\n",
|
|
"w2 = Value(1.0)\n",
|
|
"\n",
|
|
"b = Value(6.8813735870195432)\n",
|
|
"\n",
|
|
"x1w1 = x1*w1\n",
|
|
"x2w2 = x2*w2\n",
|
|
"x1w1x2w2 = x1w1 + x2w2\n",
|
|
"n = x1w1x2w2 + b\n",
|
|
"o = n.tanh()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "32fe0b65",
|
|
"metadata": {},
|
|
"source": [
|
|
"### **Neural Network, using micrograd**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "5f6e988b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import random\n",
|
|
"\n",
|
|
"class Neuron:\n",
|
|
" def __init__(self, n_inputs: int):\n",
|
|
" self.w = [Value(random.uniform(-1, 1)) for _ in range(n_inputs)]\n",
|
|
" self.b = Value(random.uniform(-1, 1))\n",
|
|
" \n",
|
|
" def __call__(self, x: list) -> Value:\n",
|
|
" activations = sum((w_i * x_i for w_i, x_i in zip(self.w, x)), self.b)\n",
|
|
" out = activations.tanh()\n",
|
|
" return out\n",
|
|
" \n",
|
|
" def parameters(self):\n",
|
|
" return self.w + [self.b]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "a36f7621",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class Layer:\n",
|
|
" def __init__(self, n_inputs: int, n_outputs: int):\n",
|
|
" self.neurons = [Neuron(n_inputs) for _ in range(n_outputs)]\n",
|
|
" \n",
|
|
" def __call__(self, x: list) -> list[Value]:\n",
|
|
" outs = [n(x) for n in self.neurons]\n",
|
|
" return outs\n",
|
|
" \n",
|
|
" def parameters(self):\n",
|
|
" return [p for n in self.neurons for p in n.parameters()]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "66b1d988",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class MLP:\n",
|
|
" def __init__(self, n_inputs: int, n_outputs: int):\n",
|
|
" sz = [n_inputs] + n_outputs\n",
|
|
" self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(n_outputs))]\n",
|
|
" \n",
|
|
" def __call__(self, x):\n",
|
|
" for layer in self.layers:\n",
|
|
" x = layer(x)\n",
|
|
" return x\n",
|
|
" \n",
|
|
" def parameters(self):\n",
|
|
" return [p for layer in self.layers for p in layer.parameters()]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"id": "0971f41c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Value(data=-0.9996003578010221)"
|
|
]
|
|
},
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# single neuron example\n",
|
|
"x = [2.5, 3.5]\n",
|
|
"n = Neuron(len(x))\n",
|
|
"n(x)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "7fc58556",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[Value(data=-0.9925180315426618),\n",
|
|
" Value(data=-0.9334684394208494),\n",
|
|
" Value(data=0.9998730786341865)]"
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# layer of neurons example\n",
|
|
"x = [1.5, 4.5]\n",
|
|
"nn = Layer(2, 3)\n",
|
|
"nn(x)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"id": "210eb775",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[Value(data=0.24818297035972064)]"
|
|
]
|
|
},
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# MLP example: input with 3 neurons, first layers with 4 neurons, second layer with 4 neurons, last output layer with 1 neuron\n",
|
|
"x = [2.0, 3.0, -1.0]\n",
|
|
"nn = MLP(3, [4, 4, 1])\n",
|
|
"nn(x)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "72283b5e",
|
|
"metadata": {},
|
|
"source": [
|
|
"### **Tune weights of our neural net**"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 72,
|
|
"id": "777ec5b8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"nn = MLP(3, [4, 4, 1])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 73,
|
|
"id": "0e7c0d95",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"xs = [\n",
|
|
" [2.0, 3.0, -1.0],\n",
|
|
" [3.0, -1.0, 0.5],\n",
|
|
" [0.5, 1.0, 1.0],\n",
|
|
" [1.0, 1.0, -1.0]\n",
|
|
"]\n",
|
|
"ys = [1.0, -1.0, -1.0, 1.0]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 74,
|
|
"id": "82948286",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0 6.396213210433728\n",
|
|
"1 4.6713236053103415\n",
|
|
"2 1.7838028388745268\n",
|
|
"3 0.9951214940339372\n",
|
|
"4 0.6244643866397004\n",
|
|
"5 0.4316410423669388\n",
|
|
"6 0.31926395466439167\n",
|
|
"7 0.24840306807651757\n",
|
|
"8 0.2007423843155306\n",
|
|
"9 0.1669919570672581\n",
|
|
"10 0.14208896965193704\n",
|
|
"11 0.12309433355505289\n",
|
|
"12 0.10820765800755643\n",
|
|
"13 0.09627486514636871\n",
|
|
"14 0.08652715381843712\n",
|
|
"15 0.07843528216744697\n",
|
|
"16 0.07162448249002293\n",
|
|
"17 0.06582279271389418\n",
|
|
"18 0.06082858473998165\n",
|
|
"19 0.05648953163226797\n",
|
|
"20 0.05268861520812526\n",
|
|
"21 0.04933459347816223\n",
|
|
"22 0.046355366905371904\n",
|
|
"23 0.04369327289796304\n",
|
|
"24 0.04130168994597353\n",
|
|
"25 0.03914254822288492\n",
|
|
"26 0.037184478469955776\n",
|
|
"27 0.03540141743451709\n",
|
|
"28 0.033771544606773694\n",
|
|
"29 0.03227646256553751\n",
|
|
"30 0.030900558653375945\n",
|
|
"31 0.029630503156717405\n",
|
|
"32 0.02845485132950758\n",
|
|
"33 0.027363725187645082\n",
|
|
"34 0.026348557141398016\n",
|
|
"35 0.025401881973255196\n",
|
|
"36 0.024517166914483014\n",
|
|
"37 0.023688671970382196\n",
|
|
"38 0.022911334430816083\n",
|
|
"39 0.022180672846249565\n",
|
|
"40 0.0214927067685973\n",
|
|
"41 0.02084388933519709\n",
|
|
"42 0.020231050374206097\n",
|
|
"43 0.019651348175094643\n",
|
|
"44 0.019102228431309494\n",
|
|
"45 0.01858138914775967\n",
|
|
"46 0.018086750531565127\n",
|
|
"47 0.017616429064053273\n",
|
|
"48 0.017168715095545756\n",
|
|
"49 0.016742053419866516\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Training loop\n",
|
|
"lr = 0.05\n",
|
|
"epochs = 50\n",
|
|
"for epoch in range(epochs):\n",
|
|
" # forward pass\n",
|
|
" y_preds = [nn(x) for x in xs]\n",
|
|
" loss = sum((y_pred[0] - y_true)**2 for y_true, y_pred in zip(ys, y_preds))\n",
|
|
"\n",
|
|
" # backward pass\n",
|
|
" for p in nn.parameters(): # zero grad\n",
|
|
" p.grad = 0.0\n",
|
|
" loss.backward()\n",
|
|
"\n",
|
|
" # update\n",
|
|
" for p in nn.parameters():\n",
|
|
" p.data += -lr * p.grad\n",
|
|
" \n",
|
|
" print(epoch, loss.data)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 75,
|
|
"id": "cc4aea5b",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[[Value(data=0.9511549278309712)],\n",
|
|
" [Value(data=-0.9459667617553708)],\n",
|
|
" [Value(data=-0.9166788866728437)],\n",
|
|
" [Value(data=0.9329611039515775)]]"
|
|
]
|
|
},
|
|
"execution_count": 75,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"y_preds"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7043ca91",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "karpathy-micrograd",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.11"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|