Image Classification with Dilated Convolutional Neural Network
Introduction
Advantages
- It can reduce the computational cost and memory usage compared to traditional CNN models.
- It can preserve more details and spatial information in the feature maps, which can improve the accuracy and robustness of the classification.
- It can capture long-range dependencies and multi-scale patterns in the images, which can enhance the representation power of the model.
Example
Code
- Loads the MNIST dataset using TensorFlow Datasets.
- Defines a preprocessing function to convert images to the desired format and resize them.
- Applies the preprocessing function, batches, shuffles, and prefetches the training data.
- Applies the preprocessing function, batches, and prefetches the test data.
- Defines a dilated CNN model using TensorFlow's Keras API, with convolutional layers, batch normalization, max pooling, and fully connected layers.
- Compiles the model by specifying the optimizer, loss function, and evaluation metric.
- Trains the model on the preprocessed training data.
- Evaluates the trained model's performance on the test data and prints the test accuracy.
|
import tensorflow as tf from tensorflow.keras import layers, models, optimizers, losses import tensorflow_datasets as tfds # Load MNIST dataset (train_data, test_data), info = tfds.load('mnist', split=['train', 'test'], as_supervised=True, with_info=True) # Preprocess the data def preprocess(image, label): image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = tf.image.resize(image, (28, 28)) return image, label train_data = train_data.map(preprocess).batch(64).shuffle(buffer_size=10000).prefetch(buffer_size=tf.data.AUTOTUNE) test_data = test_data.map(preprocess).batch(64).prefetch(buffer_size=tf.data.AUTOTUNE) # Define dilated CNN model model = models.Sequential([ layers.Conv2D(32, (5, 5), dilation_rate=1, activation='relu', padding='same', input_shape=(28, 28, 1)), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Conv2D(64, (5, 5), dilation_rate=2, activation='relu', padding='same'), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Conv2D(128, (5, 5), dilation_rate=4, activation='relu', padding='same'), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Conv2D(256, (5, 5), dilation_rate=8, activation='relu', padding='same'), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Flatten(), layers.Dense(10, activation='softmax') ]) # Compile the model model.compile(optimizer=optimizers.SGD(learning_rate=0.01, momentum=0.9), loss=losses.SparseCategoricalCrossentropy(), metrics=['accuracy']) # Train the model history = model.fit(train_data, epochs=20, validation_data=test_data) # Plot training and validation accuracy plt.figure(figsize=(10, 5)) plt.subplot(1, 2, 1) plt.plot(history.history['accuracy'], label='Train Accuracy') plt.plot(history.history['val_accuracy'], label='Validation Accuracy') plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.legend() # Plot training and validation loss plt.subplot(1, 2, 2) plt.plot(history.history['loss'], label='Train Loss') plt.plot(history.history['val_loss'], label='Validation Loss') plt.xlabel('Epoch') plt.ylabel('Loss') plt.legend() plt.tight_layout() plt.show() # Evaluate the model test_loss, test_acc = model.evaluate(test_data) print(f'Test accuracy: {test_acc:.4f}') import numpy as np import matplotlib.pyplot as plt # ... (previous code for dataset loading and model definition) # Load a single batch of test data for images, labels in test_data.take(1): sample_images = images sample_labels = labels # Make predictions on the sample images predictions = model.predict(sample_images) # Determine the grid layout based on the number of sample images num_images = sample_images.shape[0] num_rows = int(np.ceil(num_images / 5)) num_cols = min(num_images, 5) # Display the sample images and their predicted labels plt.figure(figsize=(10, 2*num_rows)) for i in range(num_images): plt.subplot(num_rows, num_cols, i+1) plt.imshow(sample_images[i, :, :, 0], cmap='gray') plt.title(f'Predicted: {np.argmax(predictions[i])}\nActual: {sample_labels[i]}') plt.axis('off') plt.tight_layout() plt.show() model.save('dilated_cnn_model.h5') loaded_model = tf.keras.models.load_model('dilated_cnn_model.h5') # Make predictions using the loaded model predictions = loaded_model.predict(test_data) import numpy as np import matplotlib.pyplot as plt # ... (previous code for dataset loading and model definition) # Load a single batch of test data for images, labels in test_data.take(1): sample_images = images sample_labels = labels # Make predictions on the sample images predictions = model.predict(sample_images) # Determine the grid layout based on the number of sample images num_images = sample_images.shape[0] num_rows = int(np.ceil(num_images / 5)) num_cols = min(num_images, 5) # Display the sample images and their predicted labels plt.figure(figsize=(10, 2*num_rows)) for i in range(num_images): plt.subplot(num_rows, num_cols, i+1) plt.imshow(sample_images[i, :, :, 0], cmap='gray') plt.title(f'Predicted: {np.argmax(predictions[i])}\nActual: {sample_labels[i]}') plt.axis('off') plt.tight_layout() plt.show() model.save('dilated_cnn_model.h5') |

0 Comments