Project 2: Image Classifier — Neurogebra vs PyTorch¶
Build a digit classifier using the MNIST-style dataset. We'll train a model to recognize handwritten digits (0-9) comparing both frameworks side by side.
🎯 Goal¶
Step 1: Load the Dataset¶
import numpy as np
from sklearn.datasets import load_digits
# Load sklearn's built-in digits dataset (8x8 images)
digits = load_digits()
X = digits.data # (1797, 64) — flattened 8x8 images
y = digits.target # (1797,) — labels 0-9
# Normalize pixel values to 0-1
X = X / 16.0 # Max pixel value in this dataset is 16
# Train/test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
print(f"Training: {X_train.shape[0]} images")
print(f"Testing: {X_test.shape[0]} images")
print(f"Image shape: 8×8 = {X_train.shape[1]} features")
print(f"Classes: {np.unique(y_train)}")
import numpy as np
import torch
import torch.nn as nn
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
# Load sklearn's built-in digits dataset (8x8 images)
digits = load_digits()
X = digits.data / 16.0
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Convert to PyTorch tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.long)
print(f"Training: {X_train_t.shape[0]} images")
print(f"Testing: {X_test_t.shape[0]} images")
print(f"Image shape: {X_train_t.shape[1]} features")
Step 2: Visualize Sample Images¶
import matplotlib.pyplot as plt
fig, axes = plt.subplots(2, 5, figsize=(12, 5))
for i, ax in enumerate(axes.flat):
ax.imshow(X_train[i].reshape(8, 8), cmap='gray')
ax.set_title(f"Label: {y_train[i]}", fontsize=12)
ax.axis('off')
plt.suptitle("Sample Training Images", fontsize=14)
plt.tight_layout()
plt.show()
Step 3: Build the Neural Network¶
from neurogebra.builders.model_builder import ModelBuilder
builder = ModelBuilder()
# Build a simple classifier
model = builder.sequential([
{"type": "dense", "units": 64, "activation": "relu", "input_shape": (64,)},
{"type": "dense", "units": 32, "activation": "relu"},
{"type": "dense", "units": 10, "activation": "softmax"}
])
print(model.summary())
What this means (in plain English):
- Layer 1: Takes 64 inputs (pixels), outputs 64 values, applies ReLU
- Layer 2: Takes 64 values, compresses to 32, applies ReLU
- Layer 3: Takes 32 values, outputs 10 (one per digit), applies Softmax (probabilities)
import torch.nn as nn
class DigitClassifier(nn.Module):
def __init__(self):
super().__init__()
self.network = nn.Sequential(
nn.Linear(64, 64),
nn.ReLU(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, 10)
)
def forward(self, x):
return self.network(x)
model = nn.Module()
model = DigitClassifier()
# Count parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")
print(model)
Key Difference
Neurogebra: Declarative style — describe what you want with a dictionary.
PyTorch: Class-based — define a class with __init__ and forward methods.
For beginners, Neurogebra's approach is much more intuitive.
Step 4: Implement Training from Scratch with Autograd¶
Since image classification requires a more hands-on approach, let's build a complete neural network from scratch using Neurogebra's autograd engine — and compare with PyTorch.
from neurogebra.core.autograd import Value
import random
class Neuron:
"""Single neuron with weights, bias, and activation."""
def __init__(self, n_inputs):
self.w = [Value(random.uniform(-1, 1) * (2/n_inputs)**0.5) for _ in range(n_inputs)]
self.b = Value(0.0)
def __call__(self, x):
# w · x + b
activation = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)
return activation.relu()
def parameters(self):
return self.w + [self.b]
class Layer:
"""Layer of neurons."""
def __init__(self, n_inputs, n_outputs):
self.neurons = [Neuron(n_inputs) for _ in range(n_outputs)]
def __call__(self, x):
return [n(x) for n in self.neurons]
def parameters(self):
return [p for n in self.neurons for p in n.parameters()]
class MLP:
"""Multi-Layer Perceptron."""
def __init__(self, sizes):
self.layers = [Layer(sizes[i], sizes[i+1]) for i in range(len(sizes)-1)]
def __call__(self, x):
for layer in self.layers:
x = layer(x)
return x
def parameters(self):
return [p for layer in self.layers for p in layer.parameters()]
# Create network: 64 → 32 → 10
mlp = MLP([64, 32, 10])
print(f"Total parameters: {len(mlp.parameters()):,}")
print(f"Architecture: 64 → 32 → 10")
import torch
import torch.nn as nn
import torch.optim as optim
model = DigitClassifier() # Defined in Step 3
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")
print(f"Architecture: 64 → 64 → 32 → 10")
Step 5: Train the Network¶
# Training loop with Neurogebra autograd
learning_rate = 0.01
batch_size = 32
epochs = 20
history = {"loss": [], "accuracy": []}
for epoch in range(epochs):
# Shuffle data
indices = list(range(len(X_train)))
random.shuffle(indices)
epoch_loss = 0.0
correct = 0
total = 0
# Mini-batch training
for start in range(0, min(len(X_train), 320), batch_size): # Use subset for speed
batch_idx = indices[start:start + batch_size]
batch_loss = Value(0.0)
for idx in batch_idx:
x = [Value(float(v)) for v in X_train[idx]]
# Forward pass
scores = mlp(x)
# Simple cross-entropy-like loss
# Softmax + negative log likelihood
target = int(y_train[idx])
# Compute max for numerical stability
max_score = max(s.data for s in scores)
exp_scores = [(s - Value(max_score)).exp() for s in scores]
sum_exp = sum(exp_scores)
# Loss = -log(probability of correct class)
prob_correct = exp_scores[target] / sum_exp
sample_loss = -(prob_correct.log())
batch_loss = batch_loss + sample_loss
# Track accuracy
predicted = max(range(10), key=lambda i: scores[i].data)
correct += (predicted == target)
total += 1
# Average loss
batch_loss = batch_loss / len(batch_idx)
# Backward pass
for p in mlp.parameters():
p.grad = 0.0
batch_loss.backward()
# Update weights (SGD)
for p in mlp.parameters():
p.data -= learning_rate * p.grad
epoch_loss += batch_loss.data
accuracy = correct / total if total > 0 else 0
history["loss"].append(epoch_loss)
history["accuracy"].append(accuracy)
if epoch % 5 == 0 or epoch == epochs - 1:
print(f"Epoch {epoch:>3d}/{epochs}: Loss = {epoch_loss:.4f}, Accuracy = {accuracy:.1%}")
# Training loop with PyTorch
epochs = 20
batch_size = 32
history = {"loss": [], "accuracy": []}
for epoch in range(epochs):
# Shuffle data
perm = torch.randperm(len(X_train_t))
X_shuffled = X_train_t[perm]
y_shuffled = y_train_t[perm]
epoch_loss = 0.0
correct = 0
total = 0
# Mini-batch training
for start in range(0, len(X_train_t), batch_size):
X_batch = X_shuffled[start:start + batch_size]
y_batch = y_shuffled[start:start + batch_size]
# Forward pass
outputs = model(X_batch)
loss = criterion(outputs, y_batch)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.item()
# Track accuracy
_, predicted = torch.max(outputs, 1)
correct += (predicted == y_batch).sum().item()
total += y_batch.size(0)
accuracy = correct / total
history["loss"].append(epoch_loss)
history["accuracy"].append(accuracy)
if epoch % 5 == 0 or epoch == epochs - 1:
print(f"Epoch {epoch:>3d}/{epochs}: Loss = {epoch_loss:.4f}, Accuracy = {accuracy:.1%}")
Key Difference
Neurogebra's autograd: You see every computation step — the forward pass, softmax, cross-entropy, and gradient updates are all explicit. This is extremely educational because you understand exactly what backpropagation does.
PyTorch: loss.backward() and optimizer.step() handle everything — faster but opaque.
Step 6: Evaluate on Test Data¶
# Test evaluation
correct = 0
total = 0
predictions = []
for i in range(len(X_test)):
x = [Value(float(v)) for v in X_test[i]]
scores = mlp(x)
predicted = max(range(10), key=lambda j: scores[j].data)
predictions.append(predicted)
if predicted == y_test[i]:
correct += 1
total += 1
test_accuracy = correct / total
print(f"\n=== Test Results (Neurogebra) ===")
print(f"Test Accuracy: {test_accuracy:.1%}")
print(f"Correct: {correct}/{total}")
# Test evaluation
with torch.no_grad():
outputs = model(X_test_t)
_, predicted = torch.max(outputs, 1)
correct = (predicted == y_test_t).sum().item()
total = y_test_t.size(0)
test_accuracy = correct / total
print(f"\n=== Test Results (PyTorch) ===")
print(f"Test Accuracy: {test_accuracy:.1%}")
print(f"Correct: {correct}/{total}")
Step 7: Confusion Matrix & Analysis¶
import matplotlib.pyplot as plt
import numpy as np
# Using Neurogebra predictions (or PyTorch — same visualization code)
predictions = np.array(predictions) # from Neurogebra evaluation above
# Confusion matrix
confusion = np.zeros((10, 10), dtype=int)
for true, pred in zip(y_test, predictions):
confusion[true][pred] += 1
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
# Plot confusion matrix
im = axes[0].imshow(confusion, cmap='Blues')
axes[0].set_xlabel("Predicted")
axes[0].set_ylabel("Actual")
axes[0].set_title("Confusion Matrix")
axes[0].set_xticks(range(10))
axes[0].set_yticks(range(10))
for i in range(10):
for j in range(10):
axes[0].text(j, i, str(confusion[i][j]),
ha='center', va='center', fontsize=8)
plt.colorbar(im, ax=axes[0])
# Plot per-class accuracy
class_acc = confusion.diagonal() / confusion.sum(axis=1)
axes[1].bar(range(10), class_acc, color='steelblue')
axes[1].set_xlabel("Digit")
axes[1].set_ylabel("Accuracy")
axes[1].set_title("Per-Digit Accuracy")
axes[1].set_xticks(range(10))
axes[1].set_ylim(0, 1.1)
for i, acc in enumerate(class_acc):
axes[1].text(i, acc + 0.02, f"{acc:.0%}", ha='center', fontsize=9)
plt.tight_layout()
plt.show()
Step 8: Visualize Predictions¶
# Show some predictions
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
for i, ax in enumerate(axes.flat):
ax.imshow(X_test[i].reshape(8, 8), cmap='gray')
true_label = y_test[i]
pred_label = predictions[i]
color = 'green' if true_label == pred_label else 'red'
ax.set_title(f"True: {true_label}, Pred: {pred_label}",
color=color, fontsize=11, fontweight='bold')
ax.axis('off')
plt.suptitle("Test Predictions (Green=Correct, Red=Wrong)", fontsize=14)
plt.tight_layout()
plt.show()
Understanding What the Network Learned (Neurogebra Bonus)¶
from neurogebra import MathForge
forge = MathForge()
# Understand the math behind each component
print("=== What You Built ===\n")
# 1. ReLU activation explanation
relu = forge.get("relu")
print(f"Activation: ReLU = {relu.symbolic_expr}")
print(f" Gradient: {relu.gradient('x').symbolic_expr}")
print(f" Purpose: Adds non-linearity, kills negative values\n")
# 2. Understanding the loss
print("Cross-Entropy Loss:")
print(" L = -log(P(correct class))")
print(" If model is confident and correct → small loss")
print(" If model is wrong → large loss\n")
# 3. Understanding softmax
print("Softmax converts scores to probabilities:")
print(" P(class_i) = exp(score_i) / sum(exp(all_scores))")
print(" All probabilities sum to 1.0\n")
# 4. Network architecture
print("Your network architecture:")
print(" Input (64 pixels) → Dense(32) + ReLU → Dense(10) + Softmax → Prediction")
print(f" Total parameters: {len(mlp.parameters()):,}")
Side-by-Side Summary¶
| Aspect | Neurogebra | PyTorch |
|---|---|---|
| Data prep | NumPy only | NumPy → Tensor conversion |
| Model definition | Dict-based or class-based | Class-based (nn.Module) |
| Training loop | Explicit computation visible | Abstracted behind .backward() |
| Evaluation | Manual iteration | Vectorized with torch.no_grad() |
| Speed | Slower (educational) | Fast (optimized C++) |
| Understanding | ✅ See every gradient | ❌ Gradients are hidden |
| Best for | Learning ML concepts | Production ML |
What You Learned¶
- How image classification works (pixels → features → class)
- How neural networks process information through layers
- How softmax converts scores to probabilities
- How cross-entropy loss measures classification error
- How backpropagation flows through a multi-layer network
- The trade-off between educational clarity and production speed
Next Project: Neural Network from Scratch →