import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing import image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from tqdm import tqdm
第3步:回想一下前面讨论过的预处理步骤。我们将在加载数据后在这里使用它们。
train = pd.read_csv('train.csv')
接下来,我们将读取所有训练图像,将它们存储在列表中,最后将该列表转换为numpy数组。
# We have grayscale images, so while loading the images we will keep grayscale=True, if you have RGB images, you should set grayscale as False
train_image = []
for i in tqdm(range(train.shape[0])):
img = image.load_img('train/'+train['id'][i].astype('str')+'.png', target_size=(28,28,1), grayscale=True)
img = image.img_to_array(img)
img = img/255
train_image.append(img)
X = np.array(train_image)
由于它是多类别分类问题(10个类),我们将使用one-hot编码目标变量。
y=train['label'].values
y = to_categorical(y)
第4步:从训练数据创建验证集。
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Replace the id and filename in the below codes
download = drive.CreateFile({'id': '1ZCzHDAfwgLdQke_GNnHp_4OheRRtNPs-'})
download.GetContentFile('Train_UQcUa52.zip')
!unzip Train_UQcUa52.zip
# Importing libraries
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing import image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from tqdm import tqdm
train = pd.read_csv('train.csv')
# Reading the training images
train_image = []
for i in tqdm(range(train.shape[0])):
img = image.load_img('Images/train/'+train['filename'][i], target_size=(28,28,1), grayscale=True)
img = image.img_to_array(img)
img = img/255
train_image.append(img)
X = np.array(train_image)
# Creating the target variable
y=train['label'].values
y = to_categorical(y)
# Creating validation set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)
# Define the model structure
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
# Compile the model
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
# Training the model
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))