Image Classification with Deep Learning

Learn how to build and train neural networks to classify images using modern deep learning techniques.

What is Image Classification?

Image Classification is the task of assigning a label or category to an entire image. It answers the question: "What is in this image?"

Examples:

Medical: Classifying X-rays as normal or showing fractures
Agriculture: Identifying crop diseases from leaf images
Security: Recognizing authorized vs unauthorized personnel
Social Media: Auto-tagging photos with relevant labels

Understanding Images in Computer Vision

Digital Image Representation

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

# Load and examine an image
img = Image.open('sample_image.jpg')
img_array = np.array(img)

print(f"Image shape: {img_array.shape}")  # (height, width, channels)
print(f"Data type: {img_array.dtype}")    # Usually uint8 (0-255)
print(f"Min pixel value: {img_array.min()}")
print(f"Max pixel value: {img_array.max()}")

# Visualize the image
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.imshow(img_array)
plt.title('Original Image')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(img_array[:, :, 0], cmap='gray')  # Red channel only
plt.title('Red Channel')
plt.axis('off')

plt.show()

Color Spaces and Channels

# RGB vs Grayscale
rgb_image = np.array(img)  # Shape: (H, W, 3)
gray_image = np.array(img.convert('L'))  # Shape: (H, W)

print(f"RGB shape: {rgb_image.shape}")
print(f"Grayscale shape: {gray_image.shape}")

# Convert RGB to other color spaces
import cv2

# Convert to HSV
hsv_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2HSV)

# Convert to LAB
lab_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2LAB)

Building Your First Image Classifier

Dataset Preparation

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# Example: CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Dataset info
print(f"Training images: {x_train.shape}")
print(f"Training labels: {y_train.shape}")
print(f"Test images: {x_test.shape}")
print(f"Test labels: {y_test.shape}")

# Class names for CIFAR-10
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

# Visualize some samples
plt.figure(figsize=(12, 6))
for i in range(10):
    plt.subplot(2, 5, i + 1)
    plt.imshow(x_train[i])
    plt.title(f'{class_names[y_train[i][0]]}')
    plt.axis('off')
plt.tight_layout()
plt.show()

Data Preprocessing

# Normalize pixel values to [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convert labels to categorical (one-hot encoding)
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

print(f"Normalized training data shape: {x_train.shape}")
print(f"One-hot labels shape: {y_train.shape}")
print(f"Sample label: {y_train[0]}")

Simple CNN Architecture

# Build a Convolutional Neural Network
model = keras.Sequential([
    # First convolutional block
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D((2, 2)),

    # Second convolutional block
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    # Third convolutional block
    layers.Conv2D(64, (3, 3), activation='relu'),

    # Flatten and dense layers
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

# Display model architecture
model.summary()

# Visualize model architecture
keras.utils.plot_model(model, show_shapes=True, show_layer_names=True)

Model Compilation and Training

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Set up callbacks
callbacks = [
    keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(factor=0.2, patience=3),
    keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True)
]

# Train the model
history = model.fit(
    x_train, y_train,
    batch_size=32,
    epochs=50,
    validation_data=(x_test, y_test),
    callbacks=callbacks,
    verbose=1
)

Evaluating Model Performance

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"Test accuracy: {test_accuracy:.4f}")

# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

Making Predictions

# Make predictions on test set
predictions = model.predict(x_test)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y_test, axis=1)

# Visualize predictions
plt.figure(figsize=(15, 10))
for i in range(20):
    plt.subplot(4, 5, i + 1)
    plt.imshow(x_test[i])

    predicted_label = class_names[predicted_classes[i]]
    true_label = class_names[true_classes[i]]
    confidence = predictions[i][predicted_classes[i]]

    color = 'green' if predicted_classes[i] == true_classes[i] else 'red'
    plt.title(f'Pred: {predicted_label}\nTrue: {true_label}\nConf: {confidence:.2f}',
              color=color, fontsize=8)
    plt.axis('off')

plt.tight_layout()
plt.show()

Advanced Techniques

Data Augmentation

# Create data augmentation pipeline
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
])

# Visualize augmentations
plt.figure(figsize=(12, 8))
for i in range(9):
    augmented_image = data_augmentation(x_train[0:1])
    plt.subplot(3, 3, i + 1)
    plt.imshow(augmented_image[0])
    plt.axis('off')
plt.suptitle('Data Augmentation Examples')
plt.show()

# Include augmentation in model
model_with_aug = keras.Sequential([
    data_augmentation,
    layers.Conv2D(32, (3, 3), activation='relu'),
    # ... rest of the model
])

Transfer Learning

# Use pre-trained model
base_model = keras.applications.VGG16(
    weights='imagenet',  # Pre-trained on ImageNet
    include_top=False,   # Exclude final classification layer
    input_shape=(32, 32, 3)
)

# Freeze base model weights
base_model.trainable = False

# Add custom classification head
model_transfer = keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])

model_transfer.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train only the new layers
history_transfer = model_transfer.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=10,
    batch_size=32
)

Fine-tuning

# Unfreeze some layers of the base model for fine-tuning
base_model.trainable = True

# Fine-tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before fine_tune_at
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

# Use lower learning rate for fine-tuning
model_transfer.compile(
    optimizer=keras.optimizers.Adam(1e-5),  # Lower learning rate
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Continue training
history_fine = model_transfer.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=10,
    batch_size=32
)

Working with Custom Datasets

Loading Custom Images

import os
from pathlib import Path

def load_custom_dataset(data_dir, img_size=(224, 224)):
    """Load images from directory structure"""
    data_dir = Path(data_dir)
    images = []
    labels = []
    class_names = sorted([d.name for d in data_dir.iterdir() if d.is_dir()])

    for class_idx, class_name in enumerate(class_names):
        class_dir = data_dir / class_name
        for img_path in class_dir.glob('*.jpg'):
            # Load and resize image
            img = Image.open(img_path).resize(img_size)
            img_array = np.array(img) / 255.0

            images.append(img_array)
            labels.append(class_idx)

    return np.array(images), np.array(labels), class_names

# Example usage
# images, labels, class_names = load_custom_dataset('path/to/dataset')

Using tf.data for Efficient Data Loading

def create_dataset(image_paths, labels, batch_size=32, img_size=(224, 224)):
    """Create tf.data dataset for efficient loading"""

    def load_and_preprocess_image(path, label):
        image = tf.io.read_file(path)
        image = tf.image.decode_image(image, channels=3)
        image = tf.image.resize(image, img_size)
        image = tf.cast(image, tf.float32) / 255.0
        return image, label

    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(load_and_preprocess_image,
                         num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset

Model Interpretation and Visualization

Class Activation Maps (CAM)

def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    """Generate GradCAM heatmap"""
    # Create model that maps input to activations and predictions
    grad_model = tf.keras.models.Model(
        [model.inputs],
        [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    # Compute gradients
    grads = tape.gradient(class_channel, last_conv_layer_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # Weight feature maps by gradients
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    # Normalize heatmap
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

# Generate and display GradCAM
sample_img = x_test[0:1]
heatmap = make_gradcam_heatmap(sample_img, model, 'conv2d_2')

plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.imshow(x_test[0])
plt.title('Original Image')
plt.axis('off')

plt.subplot(1, 3, 2)
plt.imshow(heatmap, cmap='jet')
plt.title('GradCAM Heatmap')
plt.axis('off')

plt.subplot(1, 3, 3)
plt.imshow(x_test[0])
plt.imshow(heatmap, cmap='jet', alpha=0.4)
plt.title('Overlay')
plt.axis('off')

plt.tight_layout()
plt.show()

Best Practices

1. Data Quality

Clean dataset: Remove corrupted or mislabeled images
Balanced classes: Ensure adequate samples per class
Data validation: Check for data leakage

2. Model Architecture

Start simple: Begin with basic CNNs
Progressive complexity: Add layers/features gradually
Appropriate capacity: Match model complexity to dataset size

3. Training Strategies

Learning rate scheduling: Reduce LR when plateau
Early stopping: Prevent overfitting
Cross-validation: For small datasets

4. Evaluation

Multiple metrics: Accuracy, precision, recall, F1-score
Confusion matrix: Understand class-wise performance
Error analysis: Examine misclassified samples

Common Challenges and Solutions

1. Overfitting

Symptoms: High training accuracy, low validation accuracy Solutions:

Data augmentation
Dropout layers
Early stopping
Regularization

2. Poor Performance

Symptoms: Low accuracy on both training and validation Solutions:

More complex model
Better data preprocessing
Feature engineering
Hyperparameter tuning

3. Class Imbalance

Symptoms: High accuracy but poor performance on minority classes Solutions:

Weighted loss functions
Oversampling minority classes
Data augmentation for rare classes

Next Steps

Object Detection - Locating and classifying multiple objects
Image Segmentation - Pixel-level classification
OpenCV Tutorial - Traditional computer vision techniques

💡 Pro Tip: Start with pre-trained models and transfer learning for faster development and better performance, especially with limited data!

What is Image Classification?​

Examples:​

Understanding Images in Computer Vision​

Digital Image Representation​

Color Spaces and Channels​

Building Your First Image Classifier​

Dataset Preparation​

Data Preprocessing​

Simple CNN Architecture​

Model Compilation and Training​

Evaluating Model Performance​

Making Predictions​

Advanced Techniques​

Data Augmentation​

Transfer Learning​

Fine-tuning​

Working with Custom Datasets​

Loading Custom Images​

Using tf.data for Efficient Data Loading​

Model Interpretation and Visualization​

Class Activation Maps (CAM)​

Best Practices​

1. Data Quality​

2. Model Architecture​

3. Training Strategies​

4. Evaluation​

Common Challenges and Solutions​

1. Overfitting​

2. Poor Performance​

3. Class Imbalance​

Next Steps​