import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from PIL import Image, ImageTk
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D,MaxPool2D,Dropout,Flatten,Dense,BatchNormalization
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Dense, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from english_words import english_words_set
from sklearn.base import BaseEstimator, TransformerMixin #Base class for transformers
from sklearn.pipeline import make_pipeline #Used to create a pipeline
import Levenshtein
import skimage.io as io
import gc
from keras.models import load_model
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import keras
import seaborn as sns
import tkinter as tk
from tkinter import *
from tkinter import filedialog
from tkinter.filedialog import askopenfile
from sklearn.metrics import classification_report, confusion_matrix
#import tensorflow_hub as hub
'''
------------------------------------------------------------------------------------------------------------------------
Let's define important variables.
------------------------------------------------------------------------------------------------------------------------
'''
labelDictionary = {'A':1,'B':2,'C':3,'D':4,'E':5,'F':6,'G':7,'H':8,'I':9,
'J':10,'K':11,'L':12,'M':13,'N':14,'O':15,'P':16,'Q':17,
'R':18,'S':19,'T':20,'U':21,'V':22,'W':23,'X':24,'Y':25,
'Z':26,'del':27,'nothing':28,'space':29} #Dictionary for labels
IMAGE_DIMENTION = 64 #Image dimentions
import warnings
def ignoreWarnings():
warnings.filterwarnings("ignore")
'''
------------------------------------------------------------------------------------------------------------------------
We don't want to go through constant pre-processing of the image.
So instead we are going to create a pipeline to process the image.
We can then send new images and they will be processed appropriately.
------------------------------------------------------------------------------------------------------------------------
The first class will resize the image to a specific size.
------------------------------------------------------------------------------------------------------------------------
'''
class ResizeImage(BaseEstimator, TransformerMixin):
def __init__(self, size):
self.size = size
def fit(self, X, y=None):
return self
def transform(self, X):
image_array = Image.fromarray(X , 'RGB')
#Convert to greyscale
image_array = image_array.convert('L')
resize_img = image_array.resize(self.size)
return resize_img
'''
------------------------------------------------------------------------------------------------------------------------
We add an RGB variant of the image to the pipeline.
------------------------------------------------------------------------------------------------------------------------
'''
class ResizeImageRGB(BaseEstimator, TransformerMixin):
def __init__(self, size):
self.size = size
def fit(self, X, y=None):
return self
def transform(self, X):
image_array = Image.fromarray(X , 'RGB')
resize_img = image_array.resize(self.size)
return resize_img
'''
------------------------------------------------------------------------------------------------------------------------
We then convert the image to a numpy array.
------------------------------------------------------------------------------------------------------------------------
'''
class ConvertImageToArray(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, X, y=None):
return self
def transform(self, X):
return np.array(X)
'''
------------------------------------------------------------------------------------------------------------------------
The next step involves converting the image into a float.
------------------------------------------------------------------------------------------------------------------------
'''
class ConvertImageToFloat(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, X, y=None):
return self
def transform(self, X):
return X.astype('float32')
'''
------------------------------------------------------------------------------------------------------------------------
After that we want to normalize the image.
This involves scaling the image down.
------------------------------------------------------------------------------------------------------------------------
'''
class NormalizeImage(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, X, y=None):
return self
def transform(self, X):
return X / 255.0
'''
------------------------------------------------------------------------------------------------------------------------
Lastly we want to make sure that the image is in the correct shape for the modle to process.
------------------------------------------------------------------------------------------------------------------------
'''
class ReshapeImage(BaseEstimator, TransformerMixin):
def __init__(self, shape):
self.shape = shape
def fit(self, X, y=None):
return self
def transform(self, X):
return X.reshape(self.shape)
'''
------------------------------------------------------------------------------------------------------------------------
The first step of the process is to become familiar with the folder structure of the dataset.
For now we want to work with the testing data.
We will use the os module to list the contents of the folder.
------------------------------------------------------------------------------------------------------------------------
'''
print(os.listdir("Images/asl_alphabet_train/asl_alphabet_train/"))
['A', 'B', 'C', 'D', 'del', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'nothing', 'O', 'P', 'Q', 'R', 'S', 'space', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
'''
------------------------------------------------------------------------------------------------------------------------
Let's limit the amount of memory Tensorflow can use.
The code below will set tensorflow to only use as much as it needs.
Instead of the default where it will use all the memory available.
------------------------------------------------------------------------------------------------------------------------
'''
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
print("Type: {}, name: {}".format(gpu.name, gpu.device_type))
tf.config.experimental.set_memory_growth(gpu, True)
'''
------------------------------------------------------------------------------------------------------------------------
After that we can look at the image directory and see what the file structure looks like.
------------------------------------------------------------------------------------------------------------------------
'''
print('')
Type: /physical_device:GPU:0, name: GPU
'''
------------------------------------------------------------------------------------------------------------------------
We see that each category of image is stored in a separate folder.
Where the sign means A the folder is named A.
------------------------------------------------------------------------------------------------------------------------
Let's store all these image names into a dictionary called AlphabetDictionary.
------------------------------------------------------------------------------------------------------------------------
The Key will be the name of the folder and the value will be a list of all the image names in that folder.
------------------------------------------------------------------------------------------------------------------------
'''
Folders = os.listdir("Images/asl_alphabet_train/asl_alphabet_train/") # List of all the folders in the directory
AlphabetDictionary = {}
Alphabet = []
'''
------------------------------------------------------------------------------------------------------------------------
We want all the alphabets to be one after each other.
Currently it looks like:
A, B, C, D, Del, E...
We want to remove all the non-aphabet folders and add them to the end of the list.
WE DON'T WANT TO DISCARD THE NON-ALPHABET FOLDERS, WE JUST WANT TO MOVE THEM TO THE END OF THE LIST.
------------------------------------------------------------------------------------------------------------------------
'''
'''
tempFolder = []
for folder in Folders:
if len(folder) > 1: #Check if a folder is a non-alphabet folder
tempFolder.append(Folders.pop(Folders.index(folder))) #Remove the folder from the list and add it to the end of the list
Folders.extend(tempFolder) #Add the non-alphabet folders to the end of the list
'''
'''
------------------------------------------------------------------------------------------------------------------------
For each folder in the directory, we want to store the folder name as the key and the list of image names as the value.
------------------------------------------------------------------------------------------------------------------------
'''
for folder in Folders:
Alphabet = os.listdir("Images/asl_alphabet_train/asl_alphabet_train/" + folder)
AlphabetDictionary[folder] = Alphabet
'''
------------------------------------------------------------------------------------------------------------------------
We output the dictionary to see what it looks like.
------------------------------------------------------------------------------------------------------------------------
'''
AlphabetDictionary['A'][:10]
['A1.jpg', 'A10.jpg', 'A100.jpg', 'A1000.jpg', 'A1001.jpg', 'A1002.jpg', 'A1003.jpg', 'A1004.jpg', 'A1005.jpg', 'A1006.jpg']
'''
------------------------------------------------------------------------------------------------------------------------
The dictionary is setup quite well, but we want to see how many picture we are working with for each folder.
We also want to see how many folders there are. The amount of folders will determine the amount of
classifiers we will need to train.
------------------------------------------------------------------------------------------------------------------------
'''
print(f'''
There are {len(AlphabetDictionary)} keys in the dictionary.
''')
for key in AlphabetDictionary:
print(key, len(AlphabetDictionary[key]))
There are 29 keys in the dictionary. A 3000 B 3000 C 3000 D 3000 del 3000 E 3000 F 3000 G 3000 H 3000 I 3000 J 3000 K 3000 L 3000 M 3000 N 3000 nothing 3000 O 3000 P 3000 Q 3000 R 3000 S 3000 space 3000 T 3000 U 3000 V 3000 W 3000 X 3000 Y 3000 Z 3000
'''
------------------------------------------------------------------------------------------------------------------------
There are 87000 images in total.
This is far too many images to start with in terms of a POE.
We still need to flip, rotate and blur the images, so we want to start with about 25000 images.
This will result in about 100 000 images after the augmentation.
------------------------------------------------------------------------------------------------------------------------
'''
for key in AlphabetDictionary:
AlphabetDictionary[key] = AlphabetDictionary[key][:690]
print(key, len(AlphabetDictionary[key]))
A 690 B 690 C 690 D 690 del 690 E 690 F 690 G 690 H 690 I 690 J 690 K 690 L 690 M 690 N 690 nothing 690 O 690 P 690 Q 690 R 690 S 690 space 690 T 690 U 690 V 690 W 690 X 690 Y 690 Z 690
'''
------------------------------------------------------------------------------------------------------------------------
Let's load an image so long and see what we are working with.
------------------------------------------------------------------------------------------------------------------------
'''
image = cv2.imread('Images/asl_alphabet_train/asl_alphabet_train/A/A1.jpg')
print(f'''
The image is a {type(image)}.
The image has a shape of {image.shape}.
''')
plt.figure(1 , figsize = (5 , 5))
plt.imshow(image)
The image is a <class 'numpy.ndarray'>. The image has a shape of (200, 200, 3).
<matplotlib.image.AxesImage at 0x17d8df8b370>
'''
------------------------------------------------------------------------------------------------------------------------
Let's plot out the Red, Green and Blue channels of the image.
------------------------------------------------------------------------------------------------------------------------
'''
red = image[:, :, 0] #Red channel
green = image[:, :, 1] #Green channel
blue = image[:, :, 2] #Blue channel
fig, axs = plt.subplots(2,2) #Create a 2x2 grid of subplots
cax_00 = axs[0,0].imshow(image) #Plot the image on the first subplot
axs[0,0].xaxis.set_major_formatter(plt.NullFormatter()) #Remove the x axis
axs[0,0].yaxis.set_major_formatter(plt.NullFormatter()) #Remove the y axis
cax_01 = axs[0,1].imshow(red, cmap='Reds') #Plot the red channel on the second subplot
fig.colorbar(cax_01, ax=axs[0,1]) #Add a colorbar to the second subplot
axs[0,1].xaxis.set_major_formatter(plt.NullFormatter()) #Remove the x axis
axs[0,1].yaxis.set_major_formatter(plt.NullFormatter()) #Remove the y axis
cax_10 = axs[1,0].imshow(green, cmap='Greens') #Plot the green channel on the third subplot
fig.colorbar(cax_10, ax=axs[1,0]) #Add a colorbar to the third subplot
axs[1,0].xaxis.set_major_formatter(plt.NullFormatter()) #Remove the x axis
axs[1,0].yaxis.set_major_formatter(plt.NullFormatter()) #Remove the y axis
cax_11 = axs[1,1].imshow(blue, cmap='Blues') #Plot the blue channel on the fourth subplot
fig.colorbar(cax_11, ax=axs[1,1]) # Add a colorbar to the fourth subplot
axs[1,1].xaxis.set_major_formatter(plt.NullFormatter()) #Remove the x axis
axs[1,1].yaxis.set_major_formatter(plt.NullFormatter()) #Remove the y axis
plt.show()
'''
------------------------------------------------------------------------------------------------------------------------
We see that the shape of the image is (200,200,3)
200 = Height
200 = Width
3 = RGB
There are 3 layers of 200x200 pixels.
Each layer represents either Red, Green or Blue.
------------------------------------------------------------------------------------------------------------------------
Next we want to resize the image to 50x50 pixels.
This means that the images will be extactly 4 times smaller in terms of resolution.
------------------------------------------------------------------------------------------------------------------------
We will use the pillow library for this.
------------------------------------------------------------------------------------------------------------------------
'''
from PIL import Image
#Load the image
image_array = Image.fromarray(image , 'RGB')
#Convert to greyscale
image_array = image_array.convert('L')
#Resize the image
resize_img = image_array.resize((IMAGE_DIMENTION , IMAGE_DIMENTION))
plt.figure(1 , figsize = (5 , 5))
plt.imshow(resize_img)
<matplotlib.image.AxesImage at 0x17d8dc9ef20>
'''
------------------------------------------------------------------------------------------------------------------------
We see that the resolution of the image has been reduced significantly.
But it is still possible to make out the sign for A.
------------------------------------------------------------------------------------------------------------------------
This will make it easier for the computer to process the image.
------------------------------------------------------------------------------------------------------------------------
Next we want to process the image so that the model isn't expecting the same image every time.
To do this we will rotate the image slightly.
------------------------------------------------------------------------------------------------------------------------
'''
ignoreWarnings()
rotated45 = resize_img.rotate(45) # Rotate the image 45 degrees
rotated75 = resize_img.rotate(75) # Rotate the image 75 degrees
flipped = resize_img.transpose(Image.FLIP_LEFT_RIGHT) # Flip the image
blur = cv2.blur(np.array(resize_img) ,(2,2)) # Blur the image
pictures = {'Original':resize_img, 'Rotated45':rotated45, 'Rotated75':rotated75, 'Flipped':flipped, 'Blurred':blur}
#pictures = {'Original':resize_img, 'Rotated45':rotated45, 'Rotated75':rotated75, 'Flipped':flipped}
'''
------------------------------------------------------------------------------------------------------------------------
Let's output the results.
------------------------------------------------------------------------------------------------------------------------
'''
count = 1
for key in pictures:
plt.figure(1 , figsize = (15 , 15))
plt.subplot(1 , len(pictures) , count)
count+=1
plt.title(key)
plt.subplots_adjust(hspace = 0.5 , wspace = 0.5)
plt.imshow(pictures[key])
plt.show()
plot = 0
subplots = 5
for folder in Folders:
plot+=1
image = cv2.imread('Images/asl_alphabet_train/asl_alphabet_train/' + folder + '/' + AlphabetDictionary[folder][0])
plt.figure(1 , figsize = (20 , 20))
plt.subplot(6 , 6 , plot)
plt.title(folder)
plt.subplots_adjust(hspace = 0.5 , wspace = 0.5)
plt.imshow(image)
plt.show()
'''
------------------------------------------------------------------------------------------------------------------------
Now that we know how to process the images, let's store them all in a numpy array.
------------------------------------------------------------------------------------------------------------------------
'''
ignoreWarnings()
data = []
labels = []
#labelDictionary = {'A':1,'B':2,'C':3,'D':4,'E':5,'F':6,'G':7,'H':8,'I':9,'J':10,'K':11,'L':12,'M':13,'N':14,'O':15,'P':16,'Q':17,'R':18,'S':19,'T':20,'U':21,'V':22,'W':23,'X':24,'Y':25,'Z':26,'del':27,'nothing':28,'space':29}
for key in AlphabetDictionary:
for image in AlphabetDictionary[key]:
if image is not None:
try:
image = cv2.imread('Images/asl_alphabet_train/asl_alphabet_train/' + key + '/' + image)
image_array = Image.fromarray(image , 'RGB') # Convert the image to an array
image_array = image_array.convert('L') # Convert the image to greyscale
resize_img = image_array.resize((IMAGE_DIMENTION , IMAGE_DIMENTION)) # Resize the image to 50x50 pixels
rotated45 = resize_img.rotate(45) # Rotate the image 45 degrees
rotated75 = resize_img.rotate(75) # Rotate the image 75 degrees
flipped = resize_img.transpose(Image.FLIP_LEFT_RIGHT) # Flip the image
blur = cv2.blur(np.array(resize_img) ,(1,1)) # Blur the image
data.append(np.array(resize_img)) # Add the image to the data array
data.append(np.array(rotated45)) # Add the image to the data array
data.append(np.array(rotated75)) # Add the image to the data array
data.append(np.array(flipped)) # Add the image to the data array
data.append(np.array(blur)) # Add the image to the data array
labels.append(labelDictionary[key]) # Add the label to the labels array
labels.append(labelDictionary[key]) # Add the label to the labels array
labels.append(labelDictionary[key]) # Add the label to the labels array
labels.append(labelDictionary[key]) # Add the label to the labels array
labels.append(labelDictionary[key]) # Add the label to the labels array
except AttributeError:
print('')
'''
------------------------------------------------------------------------------------------------------------------------
Now that we have the labels and the data we can convert them to numpy arrays.
And them save them so we don't have to do this again.
------------------------------------------------------------------------------------------------------------------------
'''
picturesArray = np.array(data)
labelsArray = np.array(labels)
np.save('Numpy Arrays/_X_picArray2' , picturesArray)
np.save('Numpy Arrays/_y_picArray2' , labelsArray)
'''
------------------------------------------------------------------------------------------------------------------------
Let's clear the memory.
------------------------------------------------------------------------------------------------------------------------
'''
del picturesArray, labelsArray
gc.collect()
86256
'''
------------------------------------------------------------------------------------------------------------------------
The previous step is great for visualising what is going to happen.
But the obvious flaw is that it isn't within a pipeline and it requires a lot of memory at once.
To combat this we can use a tensorflow input pipeline.
------------------------------------------------------------------------------------------------------------------------
The first step will be to load all the images into a tensorflow dataset.
This dataset will only call on the images when it needs them.
------------------------------------------------------------------------------------------------------------------------
We set shuffle to true so that the images are in no specific order.
------------------------------------------------------------------------------------------------------------------------
'''
images_ds = tf.data.Dataset.list_files('Images/asl_alphabet_train/asl_alphabet_train/*/*.jpg', shuffle=True)
for image in images_ds.take(5):
print(image.numpy())
b'Images\\asl_alphabet_train\\asl_alphabet_train\\N\\N1667.jpg' b'Images\\asl_alphabet_train\\asl_alphabet_train\\N\\N2453.jpg' b'Images\\asl_alphabet_train\\asl_alphabet_train\\N\\N664.jpg' b'Images\\asl_alphabet_train\\asl_alphabet_train\\N\\N1892.jpg' b'Images\\asl_alphabet_train\\asl_alphabet_train\\Y\\Y1956.jpg'
'''
------------------------------------------------------------------------------------------------------------------------
Let's count how many images there are in the dataset.
------------------------------------------------------------------------------------------------------------------------
This information will be useful as we will use it to allocate our training and testing data.
------------------------------------------------------------------------------------------------------------------------
'''
image_count = len(list(images_ds))
'''
------------------------------------------------------------------------------------------------------------------------
The size of the training data should be 80% of the total data.
So we will take that 80% and then skip over it again to take the rest for the testing data.
------------------------------------------------------------------------------------------------------------------------
'''
train_size = int(image_count * 0.8)
train_ds = images_ds.take(train_size)
test_ds = images_ds.skip(train_size)
'''
------------------------------------------------------------------------------------------------------------------------
Let's see what we are left with.
------------------------------------------------------------------------------------------------------------------------
'''
print(f'''
There are originally {image_count} images.
the training data has {len(list(train_ds))} images.
the testing data has {len(list(test_ds))} images.
''')
There are originally 87000 images. the training data has 69600 images. the testing data has 17400 images.
'''
------------------------------------------------------------------------------------------------------------------------
The next step will be to get the labels for the images.
The dataset currently only has the path to the images, and not the images themselves.
So we can use the image path to get the labels.
------------------------------------------------------------------------------------------------------------------------
'''
s = 'Images\\asl_alphabet_train\\asl_alphabet_train\\W\\W108.jpg'
print(s.split('\\')[-2])
W
def get_label(file_path):
return tf.strings.split(file_path, '\\')[-2]
# Get the label for the first image
for image in images_ds.take(3):
print(f'''
The image path is
{image.numpy()}
The label for this image is
{get_label(image)}''')
The image path is b'Images\\asl_alphabet_train\\asl_alphabet_train\\P\\P851.jpg' The label for this image is b'P' The image path is b'Images\\asl_alphabet_train\\asl_alphabet_train\\T\\T2517.jpg' The label for this image is b'T' The image path is b'Images\\asl_alphabet_train\\asl_alphabet_train\\Y\\Y1658.jpg' The label for this image is b'Y'
'''
------------------------------------------------------------------------------------------------------------------------
Now that we know how to get the labels for the images, we can start processing the images and storing them in a dataset.
------------------------------------------------------------------------------------------------------------------------
We can leave the images in RGB for now, so we set the channel to 3
------------------------------------------------------------------------------------------------------------------------
'''
def process_image(file_path):
label = get_label(file_path) # Get the label for the image
image = tf.io.read_file(file_path) # Read the image
image = tf.image.decode_jpeg(image, channels=3) # Leave the images in RGB for now
image = tf.image.convert_image_dtype(image, tf.float32) # Convert the image to a float32
image = tf.image.resize(image, [IMAGE_DIMENTION, IMAGE_DIMENTION]) # Resize the image to 50x50 pixels
return image, label
'''
------------------------------------------------------------------------------------------------------------------------
We can use the map function to itterate over the dataset and apply the process_image function to each image.
------------------------------------------------------------------------------------------------------------------------
'''
for img, label in train_ds.map(process_image).take(5):
print(img.shape, label.numpy())
(64, 64, 3) b'V' (64, 64, 3) b'O' (64, 64, 3) b'K' (64, 64, 3) b'J' (64, 64, 3) b'R'
'''
------------------------------------------------------------------------------------------------------------------------
Now that the concept is proven we can apply it to the whole dataset.
We can also add extra steps to the pipeline.
------------------------------------------------------------------------------------------------------------------------
The first step will be to scale the images.
------------------------------------------------------------------------------------------------------------------------
'''
def scale(image, label):
image = tf.cast(image, tf.float32) # Convert the image to a float32
image /= 255 # Scale the image
return image, label
'''
------------------------------------------------------------------------------------------------------------------------
Next we want to expand the dimensions of the images.
------------------------------------------------------------------------------------------------------------------------
'''
def expandDims(image, label):
image = tf.expand_dims(image, axis=0) # Expand the dimensions of the image
return image, label
'''
------------------------------------------------------------------------------------------------------------------------
After that we need to make sure that the labels are one hot encoded.
We will use dummy values for this.
------------------------------------------------------------------------------------------------------------------------
'''
def convertLabelToCategorical(image,label):
label = tf.strings.to_number(label, out_type=tf.int32) # Convert the label to a number
label = tf.one_hot(label, 29) # Convert the label to a one hot encoded array (There are 29 classes)
return image,label
'''
------------------------------------------------------------------------------------------------------------------------
We then need to make sure that the labels are in the correct shape.
------------------------------------------------------------------------------------------------------------------------
'''
def reshapeLabel(image, label):
label = tf.reshape(label, [1,29])
return image, label
'''
------------------------------------------------------------------------------------------------------------------------
We can then join it all together into a pipeline.
------------------------------------------------------------------------------------------------------------------------
'''
train_ds = train_ds.map(process_image).map(scale).map(expandDims).map(convertLabelToCategorical)
test_ds = test_ds.map(process_image).map(scale).map(expandDims).map(convertLabelToCategorical)
print(train_ds)
'''
------------------------------------------------------------------------------------------------------------------------
We clear some memory.
------------------------------------------------------------------------------------------------------------------------
'''
del train_ds, test_ds
print(gc.collect())
<MapDataset element_spec=(TensorSpec(shape=(1, 64, 64, 3), dtype=tf.float32, name=None), TensorSpec(shape=(29,), dtype=tf.float32, name=None))> 2621
'''
------------------------------------------------------------------------------------------------------------------------
Everything looks good so far.
------------------------------------------------------------------------------------------------------------------------
At this point we could train a model with this data, however, there is a better way.
However, this method is not as efficient as it could be.
------------------------------------------------------------------------------------------------------------------------
Before we could really fit the model we would still need to generate new data, flip images, blur images, etc.
However, if we use a data generator we can do all of this on the fly.
So let's try that route instead.
------------------------------------------------------------------------------------------------------------------------
'''
dataForGen = []
labelsForGen = []
for key in AlphabetDictionary:
for image in AlphabetDictionary[key]:
if image is not None:
try:
image = cv2.imread('Images/asl_alphabet_train/asl_alphabet_train/' + key + '/' + image)
image_array = Image.fromarray(image , 'RGB') # Convert the image to an array
resize_img = image_array.resize((IMAGE_DIMENTION , IMAGE_DIMENTION)) # Resize the image to 50x50 pixels
dataForGen.append(np.array(resize_img)) # Add the image to the data array
labelsForGen.append(labelDictionary[key]) # Add the label to the labels array
except AttributeError:
print('')
'''
------------------------------------------------------------------------------------------------------------------------
Let's take a look at the shape of the data.
------------------------------------------------------------------------------------------------------------------------
'''
print(f'''
The shape of the data array is {np.array(dataForGen).shape}
The shape of the labels array is {np.array(labelsForGen).shape}
''')
The shape of the data array is (20010, 64, 64, 3) The shape of the labels array is (20010,)
'''
------------------------------------------------------------------------------------------------------------------------
There are 87000 observations of 64 pixels by 64 pixels.
The 4th dimension is 3, for red, blue, and green.
------------------------------------------------------------------------------------------------------------------------
We can now break the data into training and testing sets.
------------------------------------------------------------------------------------------------------------------------
'''
X_train, X_test, y_train, y_test = train_test_split(dataForGen, labelsForGen, test_size=0.2, random_state=42)
print(f'''
The shape of the training data is {np.array(X_train).shape}
The shape of the training labels is {np.array(y_train).shape}
The shape of the testing data is {np.array(X_test).shape}
The shape of the testing labels is {np.array(y_test).shape}
''')
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
The shape of the training data is (16008, 64, 64, 3) The shape of the training labels is (16008,) The shape of the testing data is (4002, 64, 64, 3) The shape of the testing labels is (4002,)
'''
------------------------------------------------------------------------------------------------------------------------
Now we can use a data generator to generate new data.
As well as change the existing data.
------------------------------------------------------------------------------------------------------------------------
'''
dataGenerator = ImageDataGenerator(featurewise_center=False, # Set input mean to 0 over the dataset
samplewise_center=False, # Set each sample mean to 0
featurewise_std_normalization=False, # Divide inputs by std of the dataset
samplewise_std_normalization=False, # Divide each input by its std
zca_whitening=False, # Apply ZCA whitening
rotation_range=10, # Randomly rotate images in the range (degrees, 0 to 180)
zoom_range=0.1, # Randomly zoom image
height_shift_range=0.1, # Randomly shift images vertically (fraction of total height)
width_shift_range=0.1, # Randomly shift images horizontally (fraction of total width)
horizontal_flip=True, # Randomly flip images
vertical_flip=False) # Randomly flip images
dataGenerator.fit(X_train)
y_cat_train = to_categorical(y_train)
y_cat_test = to_categorical(y_test)
print(f'''
The shape of y_cat_train is {y_cat_train.shape}
The shape of y_cat_test is {y_cat_test.shape}
''')
The shape of y_cat_train is (16008, 30) The shape of y_cat_test is (4002, 30)
'''
------------------------------------------------------------------------------------------------------------------------
Let's create a small model to work with the data.
------------------------------------------------------------------------------------------------------------------------
'''
ignoreWarnings()
model = Sequential() # Create the model
# Add the first layer with 32 filters and a input shape that matches the images
model.add(Conv2D(32, (5, 5), input_shape=(IMAGE_DIMENTION, IMAGE_DIMENTION, 3)))
model.add(Dropout(0.2))
model.add(Activation('relu')) # Add the activation function
model.add(MaxPooling2D((2, 2))) # Add the max pooling layer
model.add(Conv2D(64, (3, 3))) # Add the second layer with 64 filters
model.add(Dropout(0.2))
model.add(Activation('relu')) # Add the activation function
model.add(MaxPooling2D((2, 2))) # Add the max pooling layer
model.add(Conv2D(64, (3, 3))) # Add the third layer with 64 filters
model.add(Dropout(0.2))
model.add(Activation('relu')) # Add the activation function
model.add(MaxPooling2D((2, 2))) # Add the max pooling layer
model.add(Flatten()) # Flatten the data
model.add(Dense(128, activation='relu')) # Add the first dense layer with 128 neurons
model.add(Dense(30, activation='softmax')) # Add the output layer with 30 neurons
model.summary() # Print the summary of the model
early_stop = EarlyStopping(monitor='val_loss',patience=2)
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 60, 60, 32) 2432 dropout (Dropout) (None, 60, 60, 32) 0 activation (Activation) (None, 60, 60, 32) 0 max_pooling2d (MaxPooling2D (None, 30, 30, 32) 0 ) conv2d_1 (Conv2D) (None, 28, 28, 64) 18496 dropout_1 (Dropout) (None, 28, 28, 64) 0 activation_1 (Activation) (None, 28, 28, 64) 0 max_pooling2d_1 (MaxPooling (None, 14, 14, 64) 0 2D) conv2d_2 (Conv2D) (None, 12, 12, 64) 36928 dropout_2 (Dropout) (None, 12, 12, 64) 0 activation_2 (Activation) (None, 12, 12, 64) 0 max_pooling2d_2 (MaxPooling (None, 6, 6, 64) 0 2D) flatten (Flatten) (None, 2304) 0 _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 60, 60, 32) 2432 dropout (Dropout) (None, 60, 60, 32) 0 activation (Activation) (None, 60, 60, 32) 0 max_pooling2d (MaxPooling2D (None, 30, 30, 32) 0 ) conv2d_1 (Conv2D) (None, 28, 28, 64) 18496 dropout_1 (Dropout) (None, 28, 28, 64) 0 activation_1 (Activation) (None, 28, 28, 64) 0 max_pooling2d_1 (MaxPooling (None, 14, 14, 64) 0 2D) conv2d_2 (Conv2D) (None, 12, 12, 64) 36928 dropout_2 (Dropout) (None, 12, 12, 64) 0 activation_2 (Activation) (None, 12, 12, 64) 0 max_pooling2d_2 (MaxPooling (None, 6, 6, 64) 0 2D) flatten (Flatten) (None, 2304) 0 dense (Dense) (None, 128) 295040 dense_1 (Dense) (None, 30) 3870 ================================================================= Total params: 356,766 Trainable params: 356,766 Non-trainable params: 0 _________________________________________________________________
ignoreWarnings()
model.fit(dataGenerator.flow(X_train, y_cat_train, batch_size=32), epochs=2, validation_data=(X_test, y_cat_test), callbacks=[early_stop], verbose=1)
Epoch 1/2 501/501 [==============================] - 20s 30ms/step - loss: 3.6767 - accuracy: 0.1572 - val_loss: 2.3048 - val_accuracy: 0.3543 Epoch 2/2 501/501 [==============================] - 14s 29ms/step - loss: 2.0742 - accuracy: 0.3719 - val_loss: 1.5336 - val_accuracy: 0.5565
<keras.callbacks.History at 0x17d8fafc310>
'''
------------------------------------------------------------------------------------------------------------------------
The model performs quite well and with a few more epochs it will hit near perfect accuracy.
------------------------------------------------------------------------------------------------------------------------
Let's move onto building the final model.
------------------------------------------------------------------------------------------------------------------------
Let's clear up some memory by deleting the data we no longer need.
------------------------------------------------------------------------------------------------------------------------
'''
del model, dataGenerator, X_train, X_test, y_train, y_test, y_cat_train, y_cat_test, dataForGen, labelsForGen
gc.collect()
print('End of Section')
End of Section
'''
------------------------------------------------------------------------------------------------------------------------
Let's load the labels and the data.
------------------------------------------------------------------------------------------------------------------------
We can also check the shape of them.
------------------------------------------------------------------------------------------------------------------------
'''
picturesArray = np.load('Numpy Arrays/X_picArray.npy')
labelsArray = np.load('Numpy Arrays/y_picArray.npy')
print(f'''
The picturesArray is a {type(picturesArray)}.
The picturesArray has a shape of {picturesArray.shape}.
The labelsArray is a {type(labelsArray)}.
The labelsArray has a shape of {labelsArray.shape}.
''')
The picturesArray is a <class 'numpy.ndarray'>. The picturesArray has a shape of (100050, 64, 64). The labelsArray is a <class 'numpy.ndarray'>. The labelsArray has a shape of (100050,).
'''
------------------------------------------------------------------------------------------------------------------------
Let's take a look at the shape of the arrays.
------------------------------------------------------------------------------------------------------------------------
'''
np.unique(labelsArray, return_counts=True)
(array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), array([3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450, 3450], dtype=int64))
'''
------------------------------------------------------------------------------------------------------------------------
We want to scale down the X values so that it is easier for the model to learn.
------------------------------------------------------------------------------------------------------------------------
'''
picturesArray = picturesArray.astype('float32') / 255.0
labelsArray = labelsArray.astype('float32')
'''
------------------------------------------------------------------------------------------------------------------------
After that we want to split the data into a training and a testing set.
We do have another folder for testing but we will use this one for now.
------------------------------------------------------------------------------------------------------------------------
'''
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(picturesArray, labelsArray, test_size=0.2, random_state=42)
'''
------------------------------------------------------------------------------------------------------------------------
We don't want to use too much memory so we will call the garbage collector and delete the variables we don't need anymore.
------------------------------------------------------------------------------------------------------------------------
'''
del picturesArray
del labelsArray
gc.collect()
'''
------------------------------------------------------------------------------------------------------------------------
We want to break the labels into dummy variables.
------------------------------------------------------------------------------------------------------------------------
'''
y_cat_train = to_categorical(y_train)
y_cat_test = to_categorical(y_test)
'''
------------------------------------------------------------------------------------------------------------------------
Now we can output the shape of the variables.
------------------------------------------------------------------------------------------------------------------------
'''
print(f'''
The shape of the X_train is {X_train.shape}.
The shape of the X_test is {X_test.shape}.
The shape of the y_train is {y_train.shape}.
The shape of the y_test is {y_test.shape}.
The shape of the y_cat_train is {y_cat_train.shape}.
The shape of the y_cat_test is {y_cat_test.shape}.
''')
The shape of the X_train is (80040, 64, 64). The shape of the X_test is (20010, 64, 64). The shape of the y_train is (80040,). The shape of the y_test is (20010,). The shape of the y_cat_train is (80040, 30). The shape of the y_cat_test is (20010, 30).
'''
------------------------------------------------------------------------------------------------------------------------
Let's take a look at how the dummy variables are setup.
------------------------------------------------------------------------------------------------------------------------
We will look at the first record in the y_cat_train.
We will notice that it's a bunch of 1 hot encoded varaiables.
One for each class.
------------------------------------------------------------------------------------------------------------------------
We see that the 11th variable is a 1.
So this is the 11th class.
Making it a K.
------------------------------------------------------------------------------------------------------------------------
'''
y_cat_train[0]
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)
'''
------------------------------------------------------------------------------------------------------------------------
The model expects to get a 4 dimensional array.
So we need to manually add the 4th dimension.
This is the channel dimension.
We only have 1 channel because it is a grayscale image.
------------------------------------------------------------------------------------------------------------------------
'''
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)
'''
------------------------------------------------------------------------------------------------------------------------
Let's check the shape of the arrays again.
------------------------------------------------------------------------------------------------------------------------
'''
print(f'''
The shape of the X_train is {X_train.shape}.
The shape of the X_test is {X_test.shape}.
''')
The shape of the X_train is (80040, 64, 64, 1). The shape of the X_test is (20010, 64, 64, 1).
'''
------------------------------------------------------------------------------------------------------------------------
Now we look at building the model.
------------------------------------------------------------------------------------------------------------------------
'''
ignoreWarnings()
model = Sequential() # Create the model
# Add the first layer with 32 filters and a input shape that matches the images
model.add(Conv2D(32, (5, 5), input_shape=(IMAGE_DIMENTION, IMAGE_DIMENTION, 1)))
model.add(Dropout(0.2))
model.add(Activation('relu')) # Add the activation function
model.add(MaxPooling2D((2, 2))) # Add the max pooling layer
model.add(Conv2D(64, (3, 3))) # Add the second layer with 64 filters
model.add(Dropout(0.2))
model.add(Activation('relu')) # Add the activation function
model.add(MaxPooling2D((2, 2))) # Add the max pooling layer
model.add(Conv2D(64, (3, 3))) # Add the third layer with 64 filters
model.add(Dropout(0.2))
model.add(Activation('relu')) # Add the activation function
model.add(MaxPooling2D((2, 2))) # Add the max pooling layer
model.add(Flatten()) # Flatten the data
model.add(Dense(128, activation='relu')) # Add the first dense layer with 128 neurons
model.add(Dense(30, activation='softmax')) # Add the output layer with 30 neurons
model.summary() # Print the summary of the model
Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_3 (Conv2D) (None, 60, 60, 32) 832 dropout_3 (Dropout) (None, 60, 60, 32) 0 activation_3 (Activation) (None, 60, 60, 32) 0 max_pooling2d_3 (MaxPooling (None, 30, 30, 32) 0 2D) conv2d_4 (Conv2D) (None, 28, 28, 64) 18496 dropout_4 (Dropout) (None, 28, 28, 64) 0 activation_4 (Activation) (None, 28, 28, 64) 0 max_pooling2d_4 (MaxPooling (None, 14, 14, 64) 0 2D) conv2d_5 (Conv2D) (None, 12, 12, 64) 36928 dropout_5 (Dropout) (None, 12, 12, 64) 0 activation_5 (Activation) (None, 12, 12, 64) 0 max_pooling2d_5 (MaxPooling (None, 6, 6, 64) 0 _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_3 (Conv2D) (None, 60, 60, 32) 832 dropout_3 (Dropout) (None, 60, 60, 32) 0 activation_3 (Activation) (None, 60, 60, 32) 0 max_pooling2d_3 (MaxPooling (None, 30, 30, 32) 0 2D) conv2d_4 (Conv2D) (None, 28, 28, 64) 18496 dropout_4 (Dropout) (None, 28, 28, 64) 0 activation_4 (Activation) (None, 28, 28, 64) 0 max_pooling2d_4 (MaxPooling (None, 14, 14, 64) 0 2D) conv2d_5 (Conv2D) (None, 12, 12, 64) 36928 dropout_5 (Dropout) (None, 12, 12, 64) 0 activation_5 (Activation) (None, 12, 12, 64) 0 max_pooling2d_5 (MaxPooling (None, 6, 6, 64) 0 2D) flatten_1 (Flatten) (None, 2304) 0 dense_2 (Dense) (None, 128) 295040 dense_3 (Dense) (None, 30) 3870 ================================================================= Total params: 355,166 Trainable params: 355,166 Non-trainable params: 0 _________________________________________________________________
'''
------------------------------------------------------------------------------------------------------------------------
We want a function that will stop the model from training if it doesn't improve after a few epochs.
We set the patience to 3 so that it will stop after 3 epochs of no changes.
------------------------------------------------------------------------------------------------------------------------
This helps prevent overfitting the data.
------------------------------------------------------------------------------------------------------------------------
'''
early_stop = EarlyStopping(monitor='val_loss',patience=3)
'''
------------------------------------------------------------------------------------------------------------------------
The last set of building the model before fitting it to compile everything together.
------------------------------------------------------------------------------------------------------------------------
'''
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
print('Model Compiled')
Model Compiled
'''
------------------------------------------------------------------------------------------------------------------------
Finally it is time to fit the model to the data.
------------------------------------------------------------------------------------------------------------------------
'''
ignoreWarnings()
model.fit(X_train, y_cat_train,
epochs=50,
batch_size=IMAGE_DIMENTION,
verbose=1,
validation_data=(X_test, y_cat_test),
callbacks=[early_stop])
Epoch 1/50 1251/1251 [==============================] - 10s 8ms/step - loss: 1.5042 - accuracy: 0.5343 - val_loss: 0.8803 - val_accuracy: 0.7446 Epoch 2/50 1251/1251 [==============================] - 9s 7ms/step - loss: 0.4200 - accuracy: 0.8644 - val_loss: 0.3405 - val_accuracy: 0.9139 Epoch 3/50 1251/1251 [==============================] - 9s 7ms/step - loss: 0.2037 - accuracy: 0.9330 - val_loss: 0.1704 - val_accuracy: 0.9621 Epoch 4/50 1251/1251 [==============================] - 9s 7ms/step - loss: 0.1349 - accuracy: 0.9559 - val_loss: 0.1736 - val_accuracy: 0.9552 Epoch 5/50 1251/1251 [==============================] - 9s 7ms/step - loss: 0.1070 - accuracy: 0.9648 - val_loss: 0.1990 - val_accuracy: 0.9359 Epoch 6/50 1251/1251 [==============================] - 9s 7ms/step - loss: 0.0814 - accuracy: 0.9738 - val_loss: 0.2171 - val_accuracy: 0.9221
<keras.callbacks.History at 0x17d8e13f460>
metrics = pd.DataFrame(model.history.history)
print("The model metrics are")
metrics.tail()
The model metrics are
loss | accuracy | val_loss | val_accuracy | |
---|---|---|---|---|
1 | 0.419959 | 0.864430 | 0.340502 | 0.913943 |
2 | 0.203736 | 0.932959 | 0.170404 | 0.962069 |
3 | 0.134882 | 0.955860 | 0.173566 | 0.955222 |
4 | 0.107029 | 0.964755 | 0.198954 | 0.935882 |
5 | 0.081381 | 0.973788 | 0.217050 | 0.922089 |
'''
------------------------------------------------------------------------------------------------------------------------
let's graph out the loss.
------------------------------------------------------------------------------------------------------------------------
'''
metrics[['loss','val_loss']].plot().set_title('Loss')
plt.show()
'''
------------------------------------------------------------------------------------------------------------------------
The model seems to be fitting quite well, potentially too well.
Let's look at the accuracy.
------------------------------------------------------------------------------------------------------------------------
'''
metrics[['accuracy','val_accuracy']].plot().set_title('Accuracy')
plt.show()
'''
------------------------------------------------------------------------------------------------------------------------
Seems that underfitting is not an issue.
The model accuracy is quite high, but that doesn't rule out overfitting.
------------------------------------------------------------------------------------------------------------------------
Let's start testing it with the test data.
------------------------------------------------------------------------------------------------------------------------
'''
predictions = model.predict(X_test)
print("Predictions done...")
predictions = np.argmax(predictions, axis=1)
626/626 [==============================] - 1s 2ms/step Predictions done...
'''
------------------------------------------------------------------------------------------------------------------------
Let's look at the classification report.
We want to see if any classes are being misclassified.
------------------------------------------------------------------------------------------------------------------------
'''
ignoreWarnings()
print(classification_report(y_test,predictions))
precision recall f1-score support 1.0 0.86 0.97 0.91 708 2.0 0.88 0.99 0.94 664 3.0 1.00 0.98 0.99 704 4.0 0.98 0.99 0.99 711 5.0 1.00 0.69 0.82 697 6.0 0.99 1.00 1.00 705 7.0 0.99 0.98 0.98 654 8.0 0.98 1.00 0.99 674 9.0 0.89 1.00 0.94 723 10.0 0.99 0.95 0.97 699 11.0 0.99 0.99 0.99 738 12.0 1.00 0.99 0.99 707 13.0 1.00 0.75 0.86 688 14.0 0.69 1.00 0.81 693 15.0 1.00 0.86 0.93 702 16.0 1.00 0.74 0.85 699 17.0 0.80 0.99 0.89 666 18.0 0.93 0.98 0.96 671 19.0 0.96 0.77 0.85 709 20.0 0.89 0.94 0.91 677 21.0 0.99 0.79 0.88 697 22.0 0.96 0.77 0.85 642 23.0 0.93 0.94 0.93 689 24.0 0.61 0.94 0.74 692 25.0 0.97 0.85 0.90 665 26.0 0.98 0.91 0.95 669 27.0 0.94 0.99 0.96 688 28.0 1.00 1.00 1.00 702 29.0 0.97 0.98 0.97 677 accuracy 0.92 20010 macro avg 0.94 0.92 0.92 20010 weighted avg 0.94 0.92 0.92 20010
'''
------------------------------------------------------------------------------------------------------------------------
All the classes seem to be predicting quite well.
Let's look at the confusion matrix.
------------------------------------------------------------------------------------------------------------------------
'''
plt.figure(figsize=(12,12))
sns.heatmap(confusion_matrix(y_test,predictions))
plt.title('Confusion Matrix')
plt.show()
'''
------------------------------------------------------------------------------------------------------------------------
Everything looks good.
------------------------------------------------------------------------------------------------------------------------
Now we can save the model so we don't have to train it again.
------------------------------------------------------------------------------------------------------------------------
'''
model.save('Manually Trained Models/_American_Sign_Language2.h5')
print("Model saved successfully...")
Model saved successfully...
'''
------------------------------------------------------------------------------------------------------------------------
Let's load the model and see how well it performs.
------------------------------------------------------------------------------------------------------------------------
'''
model = load_model('Manually Trained Models/American_Sign_Language.h5')
print('Model loaded successfully...')
Model loaded successfully...
'''
------------------------------------------------------------------------------------------------------------------------
The first thing we are going to do is load a picture of a hand that I took of someones hand.
This is not the same hand that the model was trained on.
------------------------------------------------------------------------------------------------------------------------
'''
image = cv2.imread('Images/External Images/K_External_test.JPG')
print(f'''
The image is a {type(image)}.
The image has a shape of {image.shape}.
''')
plt.figure(1 , figsize = (5 , 5))
plt.imshow(image)
The image is a <class 'numpy.ndarray'>. The image has a shape of (285, 316, 3).
<matplotlib.image.AxesImage at 0x17d8e56b8e0>
'''
------------------------------------------------------------------------------------------------------------------------
The hand is showing the letter K at the moment.
Let's process it a bit.
------------------------------------------------------------------------------------------------------------------------
'''
from PIL import Image
image_array = Image.fromarray(image , 'RGB') # Convert the image to an array
#Convert to greyscale
image_array = image_array.convert('L')
resize_img = image_array.resize((IMAGE_DIMENTION , IMAGE_DIMENTION)) # Resize the image to 50x50 pixels
plt.figure(1 , figsize = (5 , 5))
plt.imshow(resize_img , cmap='gray')
<matplotlib.image.AxesImage at 0x17d8e404fa0>
'''
------------------------------------------------------------------------------------------------------------------------
Before we test the outside data we want to make sure that the model is working properly.
So we will test using the same hand that the model was trained on.
This is still testing data, the model hasn't seen this image before.
But it has seen this hand before.
------------------------------------------------------------------------------------------------------------------------
We are going to test for the letter L.
You can replace the LETTER_TO_TEST with any letter in the alphabet.
------------------------------------------------------------------------------------------------------------------------
'''
LETTER_TO_TEST = 'L'
LETTER_TO_TEST = LETTER_TO_TEST.upper()
imageConvertionPipeline = make_pipeline(ResizeImage((IMAGE_DIMENTION, IMAGE_DIMENTION)), ConvertImageToArray(), ConvertImageToFloat(), NormalizeImage(), ReshapeImage((1, IMAGE_DIMENTION, IMAGE_DIMENTION, 1)))
currentImage = cv2.imread('Images/asl_alphabet_test/asl_alphabet_test/'+LETTER_TO_TEST+'_test.jpg')
prediction = model.predict(imageConvertionPipeline.transform(currentImage))
print(list(labelDictionary.keys())[list(labelDictionary.values()).index(np.argmax(prediction))])
1/1 [==============================] - 0s 104ms/step L 1/1 [==============================] - 0s 104ms/step L
ignoreWarnings()
'''
------------------------------------------------------------------------------------------------------------------------
But there is something to note here, our model is trained on images of 64x64, simply because it would take to long to
do otherwise.
So instead we can use transfer learning here to get the exact same algorithm.
But someone else took the time to train it on more data and with a larger input size.
------------------------------------------------------------------------------------------------------------------------
Normally this is refered to as transfer learning, but in this case we are going to use the same model.
The downloaded one simply was trained on more data.
And a higher resolution.
------------------------------------------------------------------------------------------------------------------------
Off the tensorflow hub we downloaded a pre-built model which we can use.
The model was found here: https://tfhub.dev/sayannath/american-sign-language/1
------------------------------------------------------------------------------------------------------------------------
'''
tf_model = keras.models.load_model("Pre_trained_Asl_Model")
tf_model.summary()
Model: "model" __________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_1 (InputLayer) [(None, 224, 224, 3 0 [] )] Conv1 (Conv2D) (None, 112, 112, 32 864 ['input_1[0][0]'] ) bn_Conv1 (BatchNormalization) (None, 112, 112, 32 128 ['Conv1[0][0]'] ) Conv1_relu (ReLU) (None, 112, 112, 32 0 ['bn_Conv1[0][0]'] ) expanded_conv_depthwise (Depth (None, 112, 112, 32 288 ['Conv1_relu[0][0]'] wiseConv2D) ) expanded_conv_depthwise_BN (Ba (None, 112, 112, 32 128 ['expanded_conv_depthwise[0][0]'] tchNormalization) ) expanded_conv_depthwise_relu ( (None, 112, 112, 32 0 ['expanded_conv_depthwise_BN[0][0 ReLU) ) ]'] expanded_conv_project (Conv2D) (None, 112, 112, 16 512 ['expanded_conv_depthwise_relu[0] ) [0]'] expanded_conv_project_BN (Batc (None, 112, 112, 16 64 ['expanded_conv_project[0][0]'] hNormalization) ) block_1_expand (Conv2D) (None, 112, 112, 96 1536 ['expanded_conv_project_BN[0][0]' ) ] __________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_1 (InputLayer) [(None, 224, 224, 3 0 [] )] Conv1 (Conv2D) (None, 112, 112, 32 864 ['input_1[0][0]'] ) bn_Conv1 (BatchNormalization) (None, 112, 112, 32 128 ['Conv1[0][0]'] ) Conv1_relu (ReLU) (None, 112, 112, 32 0 ['bn_Conv1[0][0]'] ) expanded_conv_depthwise (Depth (None, 112, 112, 32 288 ['Conv1_relu[0][0]'] wiseConv2D) ) expanded_conv_depthwise_BN (Ba (None, 112, 112, 32 128 ['expanded_conv_depthwise[0][0]'] tchNormalization) ) expanded_conv_depthwise_relu ( (None, 112, 112, 32 0 ['expanded_conv_depthwise_BN[0][0 ReLU) ) ]'] expanded_conv_project (Conv2D) (None, 112, 112, 16 512 ['expanded_conv_depthwise_relu[0] ) [0]'] expanded_conv_project_BN (Batc (None, 112, 112, 16 64 ['expanded_conv_project[0][0]'] hNormalization) ) block_1_expand (Conv2D) (None, 112, 112, 96 1536 ['expanded_conv_project_BN[0][0]' ) ] block_1_expand_BN (BatchNormal (None, 112, 112, 96 384 ['block_1_expand[0][0]'] ization) ) block_1_expand_relu (ReLU) (None, 112, 112, 96 0 ['block_1_expand_BN[0][0]'] ) block_1_pad (ZeroPadding2D) (None, 113, 113, 96 0 ['block_1_expand_relu[0][0]'] ) block_1_depthwise (DepthwiseCo (None, 56, 56, 96) 864 ['block_1_pad[0][0]'] nv2D) block_1_depthwise_BN (BatchNor (None, 56, 56, 96) 384 ['block_1_depthwise[0][0]'] malization) block_1_depthwise_relu (ReLU) (None, 56, 56, 96) 0 ['block_1_depthwise_BN[0][0]'] block_1_project (Conv2D) (None, 56, 56, 24) 2304 ['block_1_depthwise_relu[0][0]'] block_1_project_BN (BatchNorma (None, 56, 56, 24) 96 ['block_1_project[0][0]'] lization) block_2_expand (Conv2D) (None, 56, 56, 144) 3456 ['block_1_project_BN[0][0]'] block_2_expand_BN (BatchNormal (None, 56, 56, 144) 576 ['block_2_expand[0][0]'] ization) block_2_expand_relu (ReLU) (None, 56, 56, 144) 0 ['block_2_expand_BN[0][0]'] block_2_depthwise (DepthwiseCo (None, 56, 56, 144) 1296 ['block_2_expand_relu[0][0]'] nv2D) block_2_depthwise_BN (BatchNor (None, 56, 56, 144) 576 ['block_2_depthwise[0][0]'] malization) block_2_depthwise_relu (ReLU) (None, 56, 56, 144) 0 ['block_2_depthwise_BN[0][0]'] block_2_project (Conv2D) (None, 56, 56, 24) 3456 ['block_2_depthwise_relu[0][0]'] block_2_project_BN (BatchNorma (None, 56, 56, 24) 96 ['block_2_project[0][0]'] lization) block_2_add (Add) (None, 56, 56, 24) 0 ['block_1_project_BN[0][0]', 'block_2_project_BN[0][0]'] block_3_expand (Conv2D) (None, 56, 56, 144) 3456 ['block_2_add[0][0]'] block_3_expand_BN (BatchNormal (None, 56, 56, 144) 576 ['block_3_expand[0][0]'] ization) block_3_expand_relu (ReLU) (None, 56, 56, 144) 0 ['block_3_expand_BN[0][0]'] block_3_pad (ZeroPadding2D) (None, 57, 57, 144) 0 ['block_3_expand_relu[0][0]'] block_3_depthwise (DepthwiseCo (None, 28, 28, 144) 1296 ['block_3_pad[0][0]'] nv2D) block_3_depthwise_BN (BatchNor (None, 28, 28, 144) 576 ['block_3_depthwise[0][0]'] malization) block_3_depthwise_relu (ReLU) (None, 28, 28, 144) 0 ['block_3_depthwise_BN[0][0]'] block_3_project (Conv2D) (None, 28, 28, 32) 4608 ['block_3_depthwise_relu[0][0]'] block_3_project_BN (BatchNorma (None, 28, 28, 32) 128 ['block_3_project[0][0]'] lization) block_4_expand (Conv2D) (None, 28, 28, 192) 6144 ['block_3_project_BN[0][0]'] block_4_expand_BN (BatchNormal (None, 28, 28, 192) 768 ['block_4_expand[0][0]'] ization) block_4_expand_relu (ReLU) (None, 28, 28, 192) 0 ['block_4_expand_BN[0][0]'] block_4_depthwise (DepthwiseCo (None, 28, 28, 192) 1728 ['block_4_expand_relu[0][0]'] nv2D) block_4_depthwise_BN (BatchNor (None, 28, 28, 192) 768 ['block_4_depthwise[0][0]'] malization) block_4_depthwise_relu (ReLU) (None, 28, 28, 192) 0 ['block_4_depthwise_BN[0][0]'] block_4_project (Conv2D) (None, 28, 28, 32) 6144 ['block_4_depthwise_relu[0][0]'] block_4_project_BN (BatchNorma (None, 28, 28, 32) 128 ['block_4_project[0][0]'] lization) block_4_add (Add) (None, 28, 28, 32) 0 ['block_3_project_BN[0][0]', 'block_4_project_BN[0][0]'] block_5_expand (Conv2D) (None, 28, 28, 192) 6144 ['block_4_add[0][0]'] block_5_expand_BN (BatchNormal (None, 28, 28, 192) 768 ['block_5_expand[0][0]'] ization) block_5_expand_relu (ReLU) (None, 28, 28, 192) 0 ['block_5_expand_BN[0][0]'] block_5_depthwise (DepthwiseCo (None, 28, 28, 192) 1728 ['block_5_expand_relu[0][0]'] nv2D) block_5_depthwise_BN (BatchNor (None, 28, 28, 192) 768 ['block_5_depthwise[0][0]'] malization) block_5_depthwise_relu (ReLU) (None, 28, 28, 192) 0 ['block_5_depthwise_BN[0][0]'] block_5_project (Conv2D) (None, 28, 28, 32) 6144 ['block_5_depthwise_relu[0][0]'] block_5_project_BN (BatchNorma (None, 28, 28, 32) 128 ['block_5_project[0][0]'] lization) block_5_add (Add) (None, 28, 28, 32) 0 ['block_4_add[0][0]', 'block_5_project_BN[0][0]'] block_6_expand (Conv2D) (None, 28, 28, 192) 6144 ['block_5_add[0][0]'] block_6_expand_BN (BatchNormal (None, 28, 28, 192) 768 ['block_6_expand[0][0]'] ization) block_6_expand_relu (ReLU) (None, 28, 28, 192) 0 ['block_6_expand_BN[0][0]'] block_6_pad (ZeroPadding2D) (None, 29, 29, 192) 0 ['block_6_expand_relu[0][0]'] block_6_depthwise (DepthwiseCo (None, 14, 14, 192) 1728 ['block_6_pad[0][0]'] nv2D) block_6_depthwise_BN (BatchNor (None, 14, 14, 192) 768 ['block_6_depthwise[0][0]'] malization) block_6_depthwise_relu (ReLU) (None, 14, 14, 192) 0 ['block_6_depthwise_BN[0][0]'] block_6_project (Conv2D) (None, 14, 14, 64) 12288 ['block_6_depthwise_relu[0][0]'] block_6_project_BN (BatchNorma (None, 14, 14, 64) 256 ['block_6_project[0][0]'] lization) block_7_expand (Conv2D) (None, 14, 14, 384) 24576 ['block_6_project_BN[0][0]'] block_7_expand_BN (BatchNormal (None, 14, 14, 384) 1536 ['block_7_expand[0][0]'] ization) block_7_expand_relu (ReLU) (None, 14, 14, 384) 0 ['block_7_expand_BN[0][0]'] block_7_depthwise (DepthwiseCo (None, 14, 14, 384) 3456 ['block_7_expand_relu[0][0]'] nv2D) block_7_depthwise_BN (BatchNor (None, 14, 14, 384) 1536 ['block_7_depthwise[0][0]'] malization) block_7_depthwise_relu (ReLU) (None, 14, 14, 384) 0 ['block_7_depthwise_BN[0][0]'] block_7_project (Conv2D) (None, 14, 14, 64) 24576 ['block_7_depthwise_relu[0][0]'] block_7_project_BN (BatchNorma (None, 14, 14, 64) 256 ['block_7_project[0][0]'] lization) block_7_add (Add) (None, 14, 14, 64) 0 ['block_6_project_BN[0][0]', 'block_7_project_BN[0][0]'] block_8_expand (Conv2D) (None, 14, 14, 384) 24576 ['block_7_add[0][0]'] block_8_expand_BN (BatchNormal (None, 14, 14, 384) 1536 ['block_8_expand[0][0]'] ization) block_8_expand_relu (ReLU) (None, 14, 14, 384) 0 ['block_8_expand_BN[0][0]'] block_8_depthwise (DepthwiseCo (None, 14, 14, 384) 3456 ['block_8_expand_relu[0][0]'] nv2D) block_8_depthwise_BN (BatchNor (None, 14, 14, 384) 1536 ['block_8_depthwise[0][0]'] malization) block_8_depthwise_relu (ReLU) (None, 14, 14, 384) 0 ['block_8_depthwise_BN[0][0]'] block_8_project (Conv2D) (None, 14, 14, 64) 24576 ['block_8_depthwise_relu[0][0]'] block_8_project_BN (BatchNorma (None, 14, 14, 64) 256 ['block_8_project[0][0]'] lization) block_8_add (Add) (None, 14, 14, 64) 0 ['block_7_add[0][0]', 'block_8_project_BN[0][0]'] block_9_expand (Conv2D) (None, 14, 14, 384) 24576 ['block_8_add[0][0]'] block_9_expand_BN (BatchNormal (None, 14, 14, 384) 1536 ['block_9_expand[0][0]'] ization) block_9_expand_relu (ReLU) (None, 14, 14, 384) 0 ['block_9_expand_BN[0][0]'] block_9_depthwise (DepthwiseCo (None, 14, 14, 384) 3456 ['block_9_expand_relu[0][0]'] nv2D) block_9_depthwise_BN (BatchNor (None, 14, 14, 384) 1536 ['block_9_depthwise[0][0]'] malization) block_9_depthwise_relu (ReLU) (None, 14, 14, 384) 0 ['block_9_depthwise_BN[0][0]'] block_9_project (Conv2D) (None, 14, 14, 64) 24576 ['block_9_depthwise_relu[0][0]'] block_9_project_BN (BatchNorma (None, 14, 14, 64) 256 ['block_9_project[0][0]'] lization) block_9_add (Add) (None, 14, 14, 64) 0 ['block_8_add[0][0]', 'block_9_project_BN[0][0]'] block_10_expand (Conv2D) (None, 14, 14, 384) 24576 ['block_9_add[0][0]'] block_10_expand_BN (BatchNorma (None, 14, 14, 384) 1536 ['block_10_expand[0][0]'] lization) block_10_expand_relu (ReLU) (None, 14, 14, 384) 0 ['block_10_expand_BN[0][0]'] block_10_depthwise (DepthwiseC (None, 14, 14, 384) 3456 ['block_10_expand_relu[0][0]'] onv2D) block_10_depthwise_BN (BatchNo (None, 14, 14, 384) 1536 ['block_10_depthwise[0][0]'] rmalization) block_10_depthwise_relu (ReLU) (None, 14, 14, 384) 0 ['block_10_depthwise_BN[0][0]'] block_10_project (Conv2D) (None, 14, 14, 96) 36864 ['block_10_depthwise_relu[0][0]'] block_10_project_BN (BatchNorm (None, 14, 14, 96) 384 ['block_10_project[0][0]'] alization) block_11_expand (Conv2D) (None, 14, 14, 576) 55296 ['block_10_project_BN[0][0]'] block_11_expand_BN (BatchNorma (None, 14, 14, 576) 2304 ['block_11_expand[0][0]'] lization) block_11_expand_relu (ReLU) (None, 14, 14, 576) 0 ['block_11_expand_BN[0][0]'] block_11_depthwise (DepthwiseC (None, 14, 14, 576) 5184 ['block_11_expand_relu[0][0]'] onv2D) block_11_depthwise_BN (BatchNo (None, 14, 14, 576) 2304 ['block_11_depthwise[0][0]'] rmalization) block_11_depthwise_relu (ReLU) (None, 14, 14, 576) 0 ['block_11_depthwise_BN[0][0]'] block_11_project (Conv2D) (None, 14, 14, 96) 55296 ['block_11_depthwise_relu[0][0]'] block_11_project_BN (BatchNorm (None, 14, 14, 96) 384 ['block_11_project[0][0]'] alization) block_11_add (Add) (None, 14, 14, 96) 0 ['block_10_project_BN[0][0]', 'block_11_project_BN[0][0]'] block_12_expand (Conv2D) (None, 14, 14, 576) 55296 ['block_11_add[0][0]'] block_12_expand_BN (BatchNorma (None, 14, 14, 576) 2304 ['block_12_expand[0][0]'] lization) block_12_expand_relu (ReLU) (None, 14, 14, 576) 0 ['block_12_expand_BN[0][0]'] block_12_depthwise (DepthwiseC (None, 14, 14, 576) 5184 ['block_12_expand_relu[0][0]'] onv2D) block_12_depthwise_BN (BatchNo (None, 14, 14, 576) 2304 ['block_12_depthwise[0][0]'] rmalization) block_12_depthwise_relu (ReLU) (None, 14, 14, 576) 0 ['block_12_depthwise_BN[0][0]'] block_12_project (Conv2D) (None, 14, 14, 96) 55296 ['block_12_depthwise_relu[0][0]'] block_12_project_BN (BatchNorm (None, 14, 14, 96) 384 ['block_12_project[0][0]'] alization) block_12_add (Add) (None, 14, 14, 96) 0 ['block_11_add[0][0]', 'block_12_project_BN[0][0]'] block_13_expand (Conv2D) (None, 14, 14, 576) 55296 ['block_12_add[0][0]'] block_13_expand_BN (BatchNorma (None, 14, 14, 576) 2304 ['block_13_expand[0][0]'] lization) block_13_expand_relu (ReLU) (None, 14, 14, 576) 0 ['block_13_expand_BN[0][0]'] block_13_pad (ZeroPadding2D) (None, 15, 15, 576) 0 ['block_13_expand_relu[0][0]'] block_13_depthwise (DepthwiseC (None, 7, 7, 576) 5184 ['block_13_pad[0][0]'] onv2D) block_13_depthwise_BN (BatchNo (None, 7, 7, 576) 2304 ['block_13_depthwise[0][0]'] rmalization) block_13_depthwise_relu (ReLU) (None, 7, 7, 576) 0 ['block_13_depthwise_BN[0][0]'] block_13_project (Conv2D) (None, 7, 7, 160) 92160 ['block_13_depthwise_relu[0][0]'] block_13_project_BN (BatchNorm (None, 7, 7, 160) 640 ['block_13_project[0][0]'] alization) block_14_expand (Conv2D) (None, 7, 7, 960) 153600 ['block_13_project_BN[0][0]'] block_14_expand_BN (BatchNorma (None, 7, 7, 960) 3840 ['block_14_expand[0][0]'] lization) block_14_expand_relu (ReLU) (None, 7, 7, 960) 0 ['block_14_expand_BN[0][0]'] block_14_depthwise (DepthwiseC (None, 7, 7, 960) 8640 ['block_14_expand_relu[0][0]'] onv2D) block_14_depthwise_BN (BatchNo (None, 7, 7, 960) 3840 ['block_14_depthwise[0][0]'] rmalization) block_14_depthwise_relu (ReLU) (None, 7, 7, 960) 0 ['block_14_depthwise_BN[0][0]'] block_14_project (Conv2D) (None, 7, 7, 160) 153600 ['block_14_depthwise_relu[0][0]'] block_14_project_BN (BatchNorm (None, 7, 7, 160) 640 ['block_14_project[0][0]'] alization) block_14_add (Add) (None, 7, 7, 160) 0 ['block_13_project_BN[0][0]', 'block_14_project_BN[0][0]'] block_15_expand (Conv2D) (None, 7, 7, 960) 153600 ['block_14_add[0][0]'] block_15_expand_BN (BatchNorma (None, 7, 7, 960) 3840 ['block_15_expand[0][0]'] lization) block_15_expand_relu (ReLU) (None, 7, 7, 960) 0 ['block_15_expand_BN[0][0]'] block_15_depthwise (DepthwiseC (None, 7, 7, 960) 8640 ['block_15_expand_relu[0][0]'] onv2D) block_15_depthwise_BN (BatchNo (None, 7, 7, 960) 3840 ['block_15_depthwise[0][0]'] rmalization) block_15_depthwise_relu (ReLU) (None, 7, 7, 960) 0 ['block_15_depthwise_BN[0][0]'] block_15_project (Conv2D) (None, 7, 7, 160) 153600 ['block_15_depthwise_relu[0][0]'] block_15_project_BN (BatchNorm (None, 7, 7, 160) 640 ['block_15_project[0][0]'] alization) block_15_add (Add) (None, 7, 7, 160) 0 ['block_14_add[0][0]', 'block_15_project_BN[0][0]'] block_16_expand (Conv2D) (None, 7, 7, 960) 153600 ['block_15_add[0][0]'] block_16_expand_BN (BatchNorma (None, 7, 7, 960) 3840 ['block_16_expand[0][0]'] lization) block_16_expand_relu (ReLU) (None, 7, 7, 960) 0 ['block_16_expand_BN[0][0]'] block_16_depthwise (DepthwiseC (None, 7, 7, 960) 8640 ['block_16_expand_relu[0][0]'] onv2D) block_16_depthwise_BN (BatchNo (None, 7, 7, 960) 3840 ['block_16_depthwise[0][0]'] rmalization) block_16_depthwise_relu (ReLU) (None, 7, 7, 960) 0 ['block_16_depthwise_BN[0][0]'] block_16_project (Conv2D) (None, 7, 7, 320) 307200 ['block_16_depthwise_relu[0][0]'] block_16_project_BN (BatchNorm (None, 7, 7, 320) 1280 ['block_16_project[0][0]'] alization) Conv_1 (Conv2D) (None, 7, 7, 1280) 409600 ['block_16_project_BN[0][0]'] Conv_1_bn (BatchNormalization) (None, 7, 7, 1280) 5120 ['Conv_1[0][0]'] out_relu (ReLU) (None, 7, 7, 1280) 0 ['Conv_1_bn[0][0]'] global_average_pooling2d (Glob (None, 1280) 0 ['out_relu[0][0]'] alAveragePooling2D) dense (Dense) (None, 512) 655872 ['global_average_pooling2d[0][0]' ] dropout (Dropout) (None, 512) 0 ['dense[0][0]'] dense_1 (Dense) (None, 29) 14877 ['dropout[0][0]'] ================================================================================================== Total params: 2,928,733 Trainable params: 670,749 Non-trainable params: 2,257,984 __________________________________________________________________________________________________
'''
------------------------------------------------------------------------------------------------------------------------
The Transfer model uses different labels to the one we were using.
------------------------------------------------------------------------------------------------------------------------
'''
labelDictionaryForPrefit = {'A':0, 'B':1, 'C':2, 'D':3, 'E':4, 'F':5, 'G':6, 'H':7, 'I':8, 'J':9, 'K':10, 'L':11, 'M':12, 'N':13, 'O':14, 'P':15, 'Q':16, 'R':17, 'S':18, 'T':19, 'U':20, 'V':21, 'W':22, 'X':23, 'Y':24, 'Z':25, 'del':26, 'nothing':27, 'space':28}
'''
------------------------------------------------------------------------------------------------------------------------
We see that the model has 3 layers.
And has image dimensions of 224x224.
So for now we will accomodate for that.
------------------------------------------------------------------------------------------------------------------------
We test to see if the never before seen hand is recognized.
Remember we are expecting to see a K.
------------------------------------------------------------------------------------------------------------------------
'''
imageConvertionPipeline = make_pipeline(ResizeImageRGB((224, 224)), ConvertImageToArray(), ConvertImageToFloat(), NormalizeImage(), ReshapeImage((1, 224, 224, 3)))
currentImage = cv2.imread('Images/External Images/A_External_test.JPG')
prediction = tf_model.predict(imageConvertionPipeline.transform(currentImage))
print(list(labelDictionaryForPrefit.keys())[list(labelDictionaryForPrefit.values()).index(np.argmax(prediction))])
1/1 [==============================] - 1s 802ms/step A 1/1 [==============================] - 1s 802ms/step A
'''
------------------------------------------------------------------------------------------------------------------------
Let's introduce some external images to the model using the hand of someone kind enough to shit there and be my
hand model.
We have all the characters for the word chalk in the External Images folder.
Let's try and predict each of them.
------------------------------------------------------------------------------------------------------------------------
'''
ExternalWord = 'Chalk'
ExternalWord = ExternalWord.upper()
preditedCharacters = []
for char in ExternalWord:
currentImage = cv2.imread('Images/External Images/'+char+'_External_test.JPG')
prediction = tf_model.predict(imageConvertionPipeline.transform(currentImage))
preditedCharacters.append(list(labelDictionaryForPrefit.keys())[list(labelDictionaryForPrefit.values()).index(np.argmax(prediction))])
print(preditedCharacters)
1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 21ms/step 1/1 [==============================] - 0s 20ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 21ms/step ['C', 'H', 'A', 'L', 'K']
'''
------------------------------------------------------------------------------------------------------------------------
Let's graph the predictions.
------------------------------------------------------------------------------------------------------------------------
'''
def graphTheImagesAndPredictions(ExternalWord, preditedCharacters):
fig, ax = plt.subplots(1, len(ExternalWord), figsize=(20, 20))
for i, char in enumerate(ExternalWord):
currentImage = cv2.imread('Images/External Images/'+char+'_External_test.JPG')
ax[i].imshow(currentImage)
ax[i].set_title(preditedCharacters[i])
ax[i].axis('off')
graphTheImagesAndPredictions(ExternalWord, preditedCharacters)
'''
------------------------------------------------------------------------------------------------------------------------
Let's try whole words this time.
We won't use the external images for this.
And instead use the testing images given to us.
------------------------------------------------------------------------------------------------------------------------
'''
Word = 'Hello World'
Word = Word.upper()
preditedCharacters = []
for char in Word:
if char == ' ':
char = 'space'
imageConvertionPipeline = make_pipeline(ResizeImage((IMAGE_DIMENTION, IMAGE_DIMENTION)), ConvertImageToArray(), ConvertImageToFloat(), NormalizeImage(), ReshapeImage((1, IMAGE_DIMENTION, IMAGE_DIMENTION, 1)))
currentImage = cv2.imread('Images/asl_alphabet_test/asl_alphabet_test/'+char+'_test.jpg')
prediction = model.predict(imageConvertionPipeline.transform(currentImage))
#print(list(labelDictionary.keys())[list(labelDictionary.values()).index(np.argmax(prediction))])
preditedCharacters.append(list(labelDictionary.keys())[list(labelDictionary.values()).index(np.argmax(prediction))])
1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 28ms/step 1/1 [==============================] - 0s 19ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 13ms/step 1/1 [==============================] - 0s 14ms/step 1/1 [==============================] - 0s 17ms/step 1/1 [==============================] - 0s 15ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 16ms/step 1/1 [==============================] - 0s 15ms/step
'''
------------------------------------------------------------------------------------------------------------------------
Let's see how well the predictions went.
------------------------------------------------------------------------------------------------------------------------
'''
preditedCharacters
['H', 'E', 'L', 'L', 'O', 'space', 'W', 'O', 'R', 'L', 'D']
'''
------------------------------------------------------------------------------------------------------------------------
The predictions went very well for the most part.
Let's now join the characters together to form a word.
------------------------------------------------------------------------------------------------------------------------
'''
sentence = ''.join(preditedCharacters).replace('space', ' ').lower().capitalize()
sentence
'Hello world'
'''
------------------------------------------------------------------------------------------------------------------------
But let's assume that one of the letters was not predicted correctly, so we will use an alogrithm to correct it.
Just for the sake of testing, we will use an extremely basic algorithm that will
1. Check if the word is in the english dictionary.
2. If it is not, then compare each word in the dictionary and find the word with the closest score.
3. Return the closest word.
------------------------------------------------------------------------------------------------------------------------
'''
def findClosestWord(word):
closestWord = ''
highestMatchPercentage = 0
closestWordPercentage = 0
for englishWord in english_words_set: #Loop through all the words in the dictionary
if len(englishWord) == len(word): #Only compare words that are the same length
matchPercentage = Levenshtein.ratio(word, englishWord) #Find the match percentage
if matchPercentage > closestWordPercentage: #If the match percentage is higher than the previous highest
highestMatchPercentage = matchPercentage #Set the highest match percentage
closestWord = englishWord #Set the closest word
closestWordPercentage = matchPercentage #Set the closest word percentage
return closestWord, highestMatchPercentage #Return the closest word and the highest match percentage
tempArray = []
'''
------------------------------------------------------------------------------------------------------------------------
Let's try the sentence:
My Truck broke, but we spell each word wrong.
------------------------------------------------------------------------------------------------------------------------
'''
words = ['my', 'treck','brokr']
for word in words:
if word not in english_words_set:
closestWord, highestMatchPercentage = findClosestWord(word)
word = closestWord
tempArray.append(word)
sentence = ' '.join(tempArray)
print(f'''
The original sentence is | {' '.join(words)} |
The corrected sentence is | {sentence} |
''')
The original sentence is | my treck brokr | The corrected sentence is | my truck broke |