Skip to main content

Image Classification with Deep Learning

Learn how to build and train neural networks to classify images using modern deep learning techniques.

What is Image Classification?

Image Classification is the task of assigning a label or category to an entire image. It answers the question: "What is in this image?"

Examples:

  • Medical: Classifying X-rays as normal or showing fractures
  • Agriculture: Identifying crop diseases from leaf images
  • Security: Recognizing authorized vs unauthorized personnel
  • Social Media: Auto-tagging photos with relevant labels

Understanding Images in Computer Vision

Digital Image Representation

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

# Load and examine an image
img = Image.open('sample_image.jpg')
img_array = np.array(img)

print(f"Image shape: {img_array.shape}") # (height, width, channels)
print(f"Data type: {img_array.dtype}") # Usually uint8 (0-255)
print(f"Min pixel value: {img_array.min()}")
print(f"Max pixel value: {img_array.max()}")

# Visualize the image
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.imshow(img_array)
plt.title('Original Image')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(img_array[:, :, 0], cmap='gray') # Red channel only
plt.title('Red Channel')
plt.axis('off')

plt.show()

Color Spaces and Channels

# RGB vs Grayscale
rgb_image = np.array(img) # Shape: (H, W, 3)
gray_image = np.array(img.convert('L')) # Shape: (H, W)

print(f"RGB shape: {rgb_image.shape}")
print(f"Grayscale shape: {gray_image.shape}")

# Convert RGB to other color spaces
import cv2

# Convert to HSV
hsv_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2HSV)

# Convert to LAB
lab_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2LAB)

Building Your First Image Classifier

Dataset Preparation

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# Example: CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Dataset info
print(f"Training images: {x_train.shape}")
print(f"Training labels: {y_train.shape}")
print(f"Test images: {x_test.shape}")
print(f"Test labels: {y_test.shape}")

# Class names for CIFAR-10
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']

# Visualize some samples
plt.figure(figsize=(12, 6))
for i in range(10):
plt.subplot(2, 5, i + 1)
plt.imshow(x_train[i])
plt.title(f'{class_names[y_train[i][0]]}')
plt.axis('off')
plt.tight_layout()
plt.show()

Data Preprocessing

# Normalize pixel values to [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convert labels to categorical (one-hot encoding)
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

print(f"Normalized training data shape: {x_train.shape}")
print(f"One-hot labels shape: {y_train.shape}")
print(f"Sample label: {y_train[0]}")

Simple CNN Architecture

# Build a Convolutional Neural Network
model = keras.Sequential([
# First convolutional block
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.MaxPooling2D((2, 2)),

# Second convolutional block
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),

# Third convolutional block
layers.Conv2D(64, (3, 3), activation='relu'),

# Flatten and dense layers
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dropout(0.5),
layers.Dense(num_classes, activation='softmax')
])

# Display model architecture
model.summary()

# Visualize model architecture
keras.utils.plot_model(model, show_shapes=True, show_layer_names=True)

Model Compilation and Training

# Compile the model
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)

# Set up callbacks
callbacks = [
keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
keras.callbacks.ReduceLROnPlateau(factor=0.2, patience=3),
keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True)
]

# Train the model
history = model.fit(
x_train, y_train,
batch_size=32,
epochs=50,
validation_data=(x_test, y_test),
callbacks=callbacks,
verbose=1
)

Evaluating Model Performance

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"Test accuracy: {test_accuracy:.4f}")

# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

Making Predictions

# Make predictions on test set
predictions = model.predict(x_test)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y_test, axis=1)

# Visualize predictions
plt.figure(figsize=(15, 10))
for i in range(20):
plt.subplot(4, 5, i + 1)
plt.imshow(x_test[i])

predicted_label = class_names[predicted_classes[i]]
true_label = class_names[true_classes[i]]
confidence = predictions[i][predicted_classes[i]]

color = 'green' if predicted_classes[i] == true_classes[i] else 'red'
plt.title(f'Pred: {predicted_label}\nTrue: {true_label}\nConf: {confidence:.2f}',
color=color, fontsize=8)
plt.axis('off')

plt.tight_layout()
plt.show()

Advanced Techniques

Data Augmentation

# Create data augmentation pipeline
data_augmentation = keras.Sequential([
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
layers.RandomZoom(0.1),
layers.RandomContrast(0.1),
])

# Visualize augmentations
plt.figure(figsize=(12, 8))
for i in range(9):
augmented_image = data_augmentation(x_train[0:1])
plt.subplot(3, 3, i + 1)
plt.imshow(augmented_image[0])
plt.axis('off')
plt.suptitle('Data Augmentation Examples')
plt.show()

# Include augmentation in model
model_with_aug = keras.Sequential([
data_augmentation,
layers.Conv2D(32, (3, 3), activation='relu'),
# ... rest of the model
])

Transfer Learning

# Use pre-trained model
base_model = keras.applications.VGG16(
weights='imagenet', # Pre-trained on ImageNet
include_top=False, # Exclude final classification layer
input_shape=(32, 32, 3)
)

# Freeze base model weights
base_model.trainable = False

# Add custom classification head
model_transfer = keras.Sequential([
base_model,
layers.GlobalAveragePooling2D(),
layers.Dense(128, activation='relu'),
layers.Dropout(0.5),
layers.Dense(num_classes, activation='softmax')
])

model_transfer.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)

# Train only the new layers
history_transfer = model_transfer.fit(
x_train, y_train,
validation_data=(x_test, y_test),
epochs=10,
batch_size=32
)

Fine-tuning

# Unfreeze some layers of the base model for fine-tuning
base_model.trainable = True

# Fine-tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before fine_tune_at
for layer in base_model.layers[:fine_tune_at]:
layer.trainable = False

# Use lower learning rate for fine-tuning
model_transfer.compile(
optimizer=keras.optimizers.Adam(1e-5), # Lower learning rate
loss='categorical_crossentropy',
metrics=['accuracy']
)

# Continue training
history_fine = model_transfer.fit(
x_train, y_train,
validation_data=(x_test, y_test),
epochs=10,
batch_size=32
)

Working with Custom Datasets

Loading Custom Images

import os
from pathlib import Path

def load_custom_dataset(data_dir, img_size=(224, 224)):
"""Load images from directory structure"""
data_dir = Path(data_dir)
images = []
labels = []
class_names = sorted([d.name for d in data_dir.iterdir() if d.is_dir()])

for class_idx, class_name in enumerate(class_names):
class_dir = data_dir / class_name
for img_path in class_dir.glob('*.jpg'):
# Load and resize image
img = Image.open(img_path).resize(img_size)
img_array = np.array(img) / 255.0

images.append(img_array)
labels.append(class_idx)

return np.array(images), np.array(labels), class_names

# Example usage
# images, labels, class_names = load_custom_dataset('path/to/dataset')

Using tf.data for Efficient Data Loading

def create_dataset(image_paths, labels, batch_size=32, img_size=(224, 224)):
"""Create tf.data dataset for efficient loading"""

def load_and_preprocess_image(path, label):
image = tf.io.read_file(path)
image = tf.image.decode_image(image, channels=3)
image = tf.image.resize(image, img_size)
image = tf.cast(image, tf.float32) / 255.0
return image, label

dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
dataset = dataset.map(load_and_preprocess_image,
num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(tf.data.AUTOTUNE)

return dataset

Model Interpretation and Visualization

Class Activation Maps (CAM)

def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
"""Generate GradCAM heatmap"""
# Create model that maps input to activations and predictions
grad_model = tf.keras.models.Model(
[model.inputs],
[model.get_layer(last_conv_layer_name).output, model.output]
)

with tf.GradientTape() as tape:
last_conv_layer_output, preds = grad_model(img_array)
if pred_index is None:
pred_index = tf.argmax(preds[0])
class_channel = preds[:, pred_index]

# Compute gradients
grads = tape.gradient(class_channel, last_conv_layer_output)
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

# Weight feature maps by gradients
last_conv_layer_output = last_conv_layer_output[0]
heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
heatmap = tf.squeeze(heatmap)

# Normalize heatmap
heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
return heatmap.numpy()

# Generate and display GradCAM
sample_img = x_test[0:1]
heatmap = make_gradcam_heatmap(sample_img, model, 'conv2d_2')

plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.imshow(x_test[0])
plt.title('Original Image')
plt.axis('off')

plt.subplot(1, 3, 2)
plt.imshow(heatmap, cmap='jet')
plt.title('GradCAM Heatmap')
plt.axis('off')

plt.subplot(1, 3, 3)
plt.imshow(x_test[0])
plt.imshow(heatmap, cmap='jet', alpha=0.4)
plt.title('Overlay')
plt.axis('off')

plt.tight_layout()
plt.show()

Best Practices

1. Data Quality

  • Clean dataset: Remove corrupted or mislabeled images
  • Balanced classes: Ensure adequate samples per class
  • Data validation: Check for data leakage

2. Model Architecture

  • Start simple: Begin with basic CNNs
  • Progressive complexity: Add layers/features gradually
  • Appropriate capacity: Match model complexity to dataset size

3. Training Strategies

  • Learning rate scheduling: Reduce LR when plateau
  • Early stopping: Prevent overfitting
  • Cross-validation: For small datasets

4. Evaluation

  • Multiple metrics: Accuracy, precision, recall, F1-score
  • Confusion matrix: Understand class-wise performance
  • Error analysis: Examine misclassified samples

Common Challenges and Solutions

1. Overfitting

Symptoms: High training accuracy, low validation accuracy Solutions:

  • Data augmentation
  • Dropout layers
  • Early stopping
  • Regularization

2. Poor Performance

Symptoms: Low accuracy on both training and validation Solutions:

  • More complex model
  • Better data preprocessing
  • Feature engineering
  • Hyperparameter tuning

3. Class Imbalance

Symptoms: High accuracy but poor performance on minority classes Solutions:

  • Weighted loss functions
  • Oversampling minority classes
  • Data augmentation for rare classes

Next Steps


💡 Pro Tip: Start with pre-trained models and transfer learning for faster development and better performance, especially with limited data!