GoogLeNet Inception v1 结构 及 pytorch、tensorflow、keras、paddle实现ImageNet识别
环境
python3.6, torch 1.0.1, torchvision 0.4.0, torchsummary 1.5.1
代码
# -*- coding: utf-8 -*-
# @Time : 2020/2/3 9:45
# @Author : Zhao HL
# @File : InceptionV1-torch.py
import torch, torchvision
from torchvision import transforms
from torch import optim, argmax
from torch.nn import Conv2d, Linear, MaxPool2d, AvgPool2d, ReLU, Softmax, Dropout, Module, Sequential, CrossEntropyLoss
from torchsummary import summary
import sys, os
import numpy as np
from PIL import Image
import pandas as pd
from collections import OrderedDict
from my_utils import process_show, draw_loss_acc, dataInfo_show, dataset_divide
# region parameters
# region paths
Data_path = "./data/"
Data_csv_path = "./data/split.txt"
Model_path = 'model/'
Model_file_tf = "model/InceptionV1_tf.ckpt"
Model_file_keras = "model/InceptionV1_keras.h5"
Model_file_torch = "model/InceptionV1_torch.pth"
Model_file_paddle = "model/InceptionV1_paddle.model"
# endregion
# region image parameter
Img_size = 224
Img_chs = 3
Label_size = 1
Label_class = ['agricultural',
'airplane',
'baseballdiamond',
'beach',
'buildings',
'chaparral',
'denseresidential',
'forest',
'freeway',
'golfcourse',
'harbor',
'intersection',
'mediumresidential',
'mobilehomepark',
'overpass',
'parkinglot',
'river',
'runway',
'sparseresidential',
'storagetanks',
'tenniscourt']
Labels_nums = len(Label_class)
# endregion
# region net parameter
Conv1_kernel_size = 7
Conv1_chs = 64
Conv21_kernel_size = 1
Conv21_chs = 64
Conv2_kernel_size = 3
Conv2_chs = 192
Icp3a_size = (64, 96, 128, 16, 32, 32)
Icp3b_size = (128, 128, 192, 32, 96, 64)
Icp4a_size = (192, 96, 208, 16, 48, 64)
Icp4b_size = (160, 112, 224, 24, 64, 64)
Icp4c_size = (128, 128, 256, 24, 64, 64)
Icp4d_size = (112, 144, 288, 32, 64, 64)
Icp4e_size = (256, 160, 320, 32, 128, 128)
Icp5a_size = (256, 160, 320, 32, 128, 128)
Icp5b_size = (384, 192, 384, 48, 128, 128)
Out_chs1 = 128
Out_chs2 = 1024
# endregion
# region hpyerparameter
Learning_rate = 1e-3
Batch_size = 16
Buffer_size = 256
Infer_size = 1
Epochs = 20
Train_num = 1470
Train_batch_num = Train_num // Batch_size
Val_num = 210
Val_batch_num = Val_num // Batch_size
Test_num = 420
Test_batch_num = Test_num // Batch_size
# endregion
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# endregion
class MyDataset(torch.utils.data.Dataset):
def __init__(self, root_path, files_list=None, transform=None):
self.root_path = root_path
self.transform = transform
self.files_list = files_list if files_list else os.listdir(root_path)
self.size = len(files_list)
def __len__(self):
return self.size
def __getitem__(self, index):
img = Image.open(os.path.join(self.root_path, self.files_list[index]))
if self.transform:
img = self.transform(img)
label_str = os.path.basename(self.files_list[index])[:-6]
label = Label_class.index(label_str)
return img, label
class InceptionV1_Model(Module):
def __init__(self, input_chs, model_size):
super(InceptionV1_Model, self).__init__()
con1_chs, con31_chs, con3_chs, con51_chs, con5_chs, pool11_chs = model_size
self.conv1 = Sequential(
Conv2d(input_chs, con1_chs, kernel_size=1),
ReLU(),
)
self.conv3 = Sequential(
Conv2d(input_chs, con31_chs, kernel_size=1),
ReLU(),
Conv2d(con31_chs, con3_chs, kernel_size=3, padding=1),
ReLU(),
)
self.conv5 = Sequential(
Conv2d(input_chs, con51_chs, kernel_size=1),
ReLU(),
Conv2d(con51_chs, con5_chs, kernel_size=5, padding=2),
ReLU(),
)
self.pool1 = Sequential(
MaxPool2d(kernel_size=3, stride=1, padding=1),
Conv2d(input_chs, pool11_chs, kernel_size=1),
ReLU(),
)
def forward(self, input):
out1 = self.conv1(input)
out2 = self.conv3(input)
out3 = self.conv5(input)
out4 = self.pool1(input)
result = torch.cat([out1, out2, out3, out4], dim=1)
return result
class InceptionV1_Out(Module):
def __init__(self, input_chs):
super(InceptionV1_Out, self).__init__()
self.pool1 = Sequential(
AvgPool2d(kernel_size=5, stride=3),
)
self.conv = Sequential(
Conv2d(input_chs, Out_chs1, kernel_size=1),
ReLU(),
)
self.fc = Sequential(
Dropout(p=0.3),
Linear(Out_chs1 * 4 * 4, Out_chs2),
Dropout(p=0.3),
Linear(Out_chs2, Labels_nums),
# Softmax(),
# ReLU(),
)
def forward(self, input):
x = self.pool1(input)
x = self.conv(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
class InceptionV1(Module):
def __init__(self):
super(InceptionV1, self).__init__()
self.conv = Sequential(
Conv2d(Img_chs, Conv1_chs, kernel_size=Conv1_kernel_size, stride=2, padding=3),
ReLU(),
MaxPool2d(kernel_size=3, stride=2,padding=1),
Conv2d(Conv1_chs, Conv21_chs, kernel_size=Conv21_kernel_size),
ReLU(),
Conv2d(Conv21_chs, Conv2_chs, kernel_size=Conv2_kernel_size, padding=1),
ReLU(),
MaxPool2d(kernel_size=3, stride=2, padding=1),
)
self.inception3a = InceptionV1_Model(192, Icp3a_size)
self.inception3b = InceptionV1_Model(256, Icp3b_size)
self.pool1 = MaxPool2d(kernel_size=3, stride=2, padding=1)
self.inception4a = InceptionV1_Model(480, Icp4a_size)
if self.training == True:
self.out1 = InceptionV1_Out(512)
self.inception4b = InceptionV1_Model(512, Icp4b_size)
self.inception4c = InceptionV1_Model(512, Icp4c_size)
self.inception4d = InceptionV1_Model(512, Icp4d_size)
if self.training == True:
self.out2 = InceptionV1_Out(528)
self.inception4e = InceptionV1_Model(528, Icp4e_size)
self.pool2 = MaxPool2d(kernel_size=3, stride=2, padding=1)
self.inception5a = InceptionV1_Model(832, Icp5a_size)
self.inception5b = InceptionV1_Model(832, Icp5b_size)
self.pool3 = AvgPool2d(kernel_size=7, stride=1, )
self.linear = Sequential(
Dropout(p=0.4),
Linear(1024, Labels_nums),
# Softmax(),
# ReLU(),
)
def forward(self, input):
x = self.conv(input)
x = self.inception3a(x)
x = self.inception3b(x)
x = self.pool1(x)
x = self.inception4a(x)
if self.training == True:
output1 = self.out1(x)
x = self.inception4b(x)
x = self.inception4c(x)
x = self.inception4d(x)
if self.training == True:
output2 = self.out2(x)
x = self.inception4e(x)
x = self.pool2(x)
x = self.inception5a(x)
x = self.inception5b(x)
x = self.pool3(x)
x = x.view(x.size(0), -1)
output = self.linear(x)
if self.training == True:
return output1, output2, output
else:
return output
def train(structShow=False):
transform = transforms.Compose([
transforms.Resize((Img_size, Img_size)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
df = pd.read_csv(Data_csv_path, header=0, index_col=0)
train_list = df[df['split'] == 'train']['filename'].tolist()
val_list = df[df['split'] == 'val']['filename'].tolist()
train_dataset = MyDataset(Data_path, files_list=train_list, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=Batch_size, shuffle=True)
val_dataset = MyDataset(Data_path, files_list=val_list, transform=transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=Batch_size, shuffle=True)
model = InceptionV1().to(device)
model.train()
if structShow:
print(summary(model, (3, 224, 224)))
# if os.path.exists(Model_file_torch):
# model.load_state_dict(torch.load(Model_file_torch))
# print('get model from',Model_file_torch)
criterion = CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=Learning_rate)
train_loss = np.ones(Epochs)
train_acc = np.ones(Epochs)
val_loss = np.ones(Epochs)
val_acc = np.ones(Epochs)
best_loss = float("inf")
best_loss_epoch = 0
for epoch in range(Epochs):
print('Epoch %d/%d:' % (epoch + 1, Epochs))
train_sum_loss = 0
train_sum_acc = 0
val_sum_loss = 0
val_sum_acc = 0
model.train()
with torch.set_grad_enabled(True):
for batch_num, (images, labels) in enumerate(train_loader):
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
output, output1, output2 = model(images)
loss = criterion(output, labels)
loss1 = criterion(output1, labels)
loss2 = criterion(output2, labels)
total_loss = 0.6*loss + 0.2*loss1 + 0.2*loss2
train_sum_loss += total_loss.item()
total_loss.backward()
optimizer.step()
_, predicted = torch.max(output.data, 1)
correct = (predicted == labels).sum().float()
acc = correct / labels.size(0)
train_sum_acc += acc
process_show(batch_num + 1, len(train_loader), acc, loss, prefix='train:')
model.eval()
with torch.set_grad_enabled(False):
for batch_num, (images, labels) in enumerate(val_loader):
images, labels = images.to(device), labels.to(device)
output = model(images)
loss = criterion(output, labels)
val_sum_loss += loss.item()
_, predicted = torch.max(output.data, 1)
correct = (predicted == labels).sum().float()
acc = correct / labels.size(0)
val_sum_acc += acc
process_show(batch_num + 1, len(val_loader), acc, loss, prefix='val:')
train_sum_loss /= len(train_loader)
train_sum_acc /= len(train_loader)
val_sum_loss /= len(val_loader)
val_sum_acc /= len(val_loader)
train_loss[epoch] = train_sum_loss
train_acc[epoch] = train_sum_acc
val_loss[epoch] = val_sum_loss
val_acc[epoch] = val_sum_acc
print('average summary:\ntrain acc %.4f, loss %.4f ; val acc %.4f, loss %.4f'
% (train_sum_acc, train_sum_loss, val_sum_acc, val_sum_loss))
if val_sum_loss < best_loss:
print('val_loss improve from %.4f to %.4f, model save to %s ! \n' % (
best_loss, val_sum_loss, Model_file_torch))
best_loss = val_sum_loss
best_loss_epoch = epoch + 1
torch.save(model.state_dict(), Model_file_torch)
else:
print('val_loss do not improve from %.4f \n' % (best_loss))
print('best loss %.4f at epoch %d \n' % (best_loss, best_loss_epoch))
draw_loss_acc(train_loss, train_acc, 'train')
draw_loss_acc(val_loss, val_acc, 'val')
if __name__ == '__main__':
pass
# dataset_divide(r'E:\_Python\01_deeplearning\04_GoogLeNet\Inception1\data\split.txt')
train(structShow=True)
my_utils.py
# -*- coding: utf-8 -*-
# @Time : 2020/1/21 11:39
# @Author : Zhao HL
# @File : my_utils.py
import sys,os,random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
def process_show(num, nums, train_acc, train_loss, prefix='', suffix=''):
rate = num / nums
ratenum = int(round(rate, 2) * 100)
bar = '\r%s batch %3d/%d:train accuracy %.4f, train loss %00.4f [%s%s]%.1f%% %s; ' % (
prefix, num, nums, train_acc, train_loss, '#' * (ratenum//2), '_' * (50 - ratenum//2), ratenum, suffix)
sys.stdout.write(bar)
sys.stdout.flush()
if num >= nums:
print()
def dataInfo_show(data_path,csv_pth,cls_dic_path,shapesShow=True,classesShow=True):
cls_dict = get_cls_dic(cls_dic_path)
if classesShow:
print('\n'+'*'*50)
df = pd.read_csv(csv_pth)
labels = df['label'].unique()
label_cls = {label:cls_dict[label] for label in labels}
print(label_cls)
cls_count = df['label'].value_counts()
cls_count = {cls_dict[k]:v for k,v in cls_count.items()}
for k,v in cls_count.items():
print(k,v)
if shapesShow:
print('\n'+'*'*50)
shapes = []
for filename in os.listdir(data_path):
img = Image.open(os.path.join(data_path, filename))
img = np.array(img)
shapes.append(img.shape)
shapes = pd.Series(shapes)
print(shapes.value_counts())
def get_cls_dic(cls_dic_path):
# 读取类标签字典,只取第一个逗号前的信息
cls_df = pd.read_csv(cls_dic_path)
cls_df['cls'] = cls_df['info'].apply(lambda x:x[:9]).tolist()
cls_df['label'] = cls_df['info'].apply(lambda x: x[10:]).tolist()
cls_df = cls_df.drop(columns=['info','other'])
cls_dict = cls_df.set_index('cls').T.to_dict('list')
cls_dict = {k:v[0] for k,v in cls_dict.items()}
return cls_dict
def dataset_divide(csv_pth):
cls_df = pd.read_csv(csv_pth, header=0,index_col=0)
cls_df.insert(1,'split',None)
filenames = list(cls_df['filename'])
random.shuffle(filenames)
train_num,train_val_num = int(len(filenames)*0.7),int(len(filenames)*0.8)
train_names = filenames[:train_num]
val_names = filenames[train_num:train_val_num]
test_names = filenames[train_val_num:]
cls_df.loc[cls_df['filename'].isin(train_names),'split'] = 'train'
cls_df.loc[cls_df['filename'].isin(val_names), 'split'] = 'val'
cls_df.loc[cls_df['filename'].isin(test_names), 'split'] = 'test'
cls_df.to_csv(csv_pth)
def draw_loss_acc(loss,acc,type='',save_path=None):
assert len(acc) == len(loss)
x = [epoch for epoch in range(len(acc))]
plt.subplot(2, 1, 1)
plt.plot(x, acc, 'o-')
plt.title(type+' accuracy vs. epoches')
plt.ylabel('accuracy')
plt.subplot(2, 1, 2)
plt.plot(x, loss, '.-')
plt.xlabel(type+' loss vs. epoches')
plt.ylabel('loss')
plt.show()
if save_path:
plt.savefig(os.path.join(save_path,type+"_acc_loss.png"))
if __name__ == '__main__':
pass
作者:GISer_Lin