Stancy Zhang

Goal

Dive into the vibrant world of CIFAR image datasets through a meticulously designed neural network model. Utilizing the power of PyTorch, this project underscores the nuances of handling multi-class image classifications. The network, calibrated and assessed on the CIFAR dataset, showcases the synergy between convolutional strategies and advanced optimization techniques, transforming raw pixel values into discernible object classifications.

Import Packages

import os
if ('google' in str(get_ipython())):
	from google.colab import drive
	drive.mount('ME')
	#predir='/content/ME/My Drive/'
	predir='ME/My Drive/'
else:
	predir = os.path.join('Users','amit','Google Drive')
	if os.path.isdir(os.path.join(predir,'My Drive')):
			predir=os.path.join(predir,'My Drive')
	
import torch
import numpy as np

# Torch functions 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Utility to track progress of a routine.
#from tqdm import tqdm
from tqdm.notebook import trange, tqdm

# Folder with course data
datadir=predir
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Drive already mounted at ME; to attempt to forcibly remount, call drive.mount("ME", force_remount=True).

Get cifar10 data and split into training, validation and testing.

import h5py

def get_cifar():
	with h5py.File(datadir+'cifar10_train.hdf5', "r") as f:
		tr=f[('data')][:].transpose(0,3,1,2)
		tr_lb=f[('labels')][:]
	train_data=np.float32(tr[0:45000])/255.
	train_labels=tr_lb[0:45000]
	val_data=np.float32(tr[45000:])/255.
	val_labels=tr_lb[45000:]
	
	with h5py.File(datadir+'cifar10_test.hdf5', "r") as f:
		test_data=f[('data')][:].transpose(0,3,1,2)
		test_data=np.float32(test_data)/255.
		test_labels=f[('labels')][:]
	return (train_data, train_labels), (val_data, val_labels), (test_data, test_labels)

Get the data

def get_data(data_set):
	if (data_set=="mnist"):
		return(get_mnist())
	elif (data_set=="cifar"):
		return(get_cifar())

# An object containing the relevant parameters for running the experiment.
class par(object):
	def __init__(self):
		self.batch_size=1000
		self.step_size=.001
		self.num_epochs=20
		self.numtrain=10000
		self.minimizer="Adam"
		self.data_set="cifar"
		self.model_name="model"
		self.dropout=0.
		self.dim=32
		self.pool_size=2
		self.kernel_size=5
		self.mid_layer=256
		self.use_gpu=False

pars=par()

train,val,test=get_data(data_set=pars.data_set)

num_images = 10
fig, axes = plt.subplots(1, num_images, figsize=(15, 3))
for i in range(num_images):
	img = train[0][i].transpose(1, 2, 0)
	axes[i].imshow(img)
	axes[i].axis('off')
	axes[i].set_title(f'Label: {train[1][i]}')

plt.show()

png

class CIFAR_Net(nn.Module):
	def __init__(self,pars):
		super(CIFAR_Net, self).__init__()
		
		ks = pars.kernel_size
		ps = np.int32(pars.pool_size)
		self.mid_layer = pars.mid_layer
		# Two successive convolutional layers.
		# Two pooling layers that come after convolutional layers.
		# Two dropout layers.
		self.conv1 = nn.Conv2d(3, 32, kernel_size=ks[0], padding=ks[0] // 2)
		self.pool1 = nn.MaxPool2d(kernel_size=[ps], stride=2)
		self.conv2 = nn.Conv2d(32, 64, kernel_size=ks[1], padding=ks[1] // 2)
		self.drop2 = nn.Dropout2d(pars.dropout)
		self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
		self.drop_final = nn.Dropout(pars.dropout)
		
				
		
		# Run the network one time on one dummy data point of the same 
		# dimension as the input images to get dimensions of fully connected 
		# layer that comes after second convolutional layers
		self.first=True
		if self.first:
			self.forward(torch.zeros((1,)+pars.inp_dim))
			
		# Setup the optimizer type and send it the parameters of the model
		if pars.minimizer == 'Adam':
			self.optimizer = torch.optim.Adam(self.parameters(), lr = pars.step_size)
		else:
			self.optimizer = torch.optim.SGD(self.parameters(), lr = pars.step_size)
			
		self.criterion=nn.CrossEntropyLoss()
		
	def forward(self, x):
		
		# Apply relu to a pooled conv1 layer.
		x = F.relu(self.pool1(self.conv1(x)))
		if self.first:
			print('conv1',x.shape)
		# Apply relu to a pooled conv2 layer with a drop layer inbetween.
		x = self.drop2(F.relu(self.pool2(self.conv2(x))))
		if self.first:
			print('conv2',x.shape)
		
		if self.first:
			self.first=False
			self.inp=x.shape[1]*x.shape[2]*x.shape[3]
			# Compute dimension of output of x and setup a fully connected layer with that input dim 
			# pars.mid_layer output dim. Then setup final 10 node output layer.
			print('input dimension to fc1',self.inp)
			if self.mid_layer is not None:
				self.fc1 = nn.Linear(self.inp, self.mid_layer)
				self.fc_final = nn.Linear(self.mid_layer, 10)
			else:
				self.fc1=nn.Identity()
				self.fc_final = nn.Linear(self.inp, 10)
			# Print out all network parameter shapes and compute total:
			tot_pars=0
			for k,p in self.named_parameters():
				tot_pars+=p.numel()
				print(k,p.shape)
			# Calculate and print the number of parameters
			print('tot_pars',tot_pars)
		x = x.reshape(-1, self.inp)
		x = F.relu(self.fc1(x))
		x = self.drop_final(x)
		x = self.fc_final(x)
		return x
	
	# Run the network on the data, compute the loss, compute the predictions and compute classification rate/
	def get_acc_and_loss(self, data, targ):
		output = self.forward(data)
		loss = self.criterion(output, targ)
		pred = torch.max(output,1)[1]
		correct = torch.eq(pred,targ).sum()
		
		return loss,correct
	
	# Compute classification and loss and then do a gradient step on the loss.
	def run_grad(self,data,targ):
	
		loss, correct=self.get_acc_and_loss(data,targ)
		self.optimizer.zero_grad()
		loss.backward()
		self.optimizer.step()
		
		return loss, correct

# use GPU when possible
pars.device = device
pars.kernel_size=[5,5]
train,val,test=get_data(data_set=pars.data_set)
pars.inp_dim=train[0][0].shape
# Initialize the network
net = CIFAR_Net(pars).to(pars.device)
# Post it to the gpu if its there.
net.to(pars.device)

conv1 torch.Size([1, 32, 16, 16])
conv2 torch.Size([1, 64, 8, 8])
input dimension to fc1 4096
conv1.weight torch.Size([32, 3, 5, 5])
conv1.bias torch.Size([32])
conv2.weight torch.Size([64, 32, 5, 5])
conv2.bias torch.Size([64])
fc1.weight torch.Size([256, 4096])
fc1.bias torch.Size([256])
fc_final.weight torch.Size([10, 256])
fc_final.bias torch.Size([10])
tot_pars 1105098





CIFAR_Net(
	(conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
	(pool1): MaxPool2d(kernel_size=[2], stride=2, padding=0, dilation=1, ceil_mode=False)
	(conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
	(drop2): Dropout2d(p=0.0, inplace=False)
	(pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
	(drop_final): Dropout(p=0.0, inplace=False)
	(fc1): Linear(in_features=4096, out_features=256, bias=True)
	(fc_final): Linear(in_features=256, out_features=10, bias=True)
	(criterion): CrossEntropyLoss()
)

# use GPU when possible
pars.device = device
pars.kernel_size=[5,5]
train,val,test=get_data(data_set=pars.data_set)
pars.inp_dim=train[0][0].shape
# Initialize the network
net = CIFAR_Net(pars).to(pars.device)
# Post it to the gpu if its there.
net.to(pars.device)

train=(train[0][0:pars.numtrain],train[1][0:pars.numtrain])

# Initialize lists to store the training and validation error rates.
train_error_rates = []
test_error_rates = []

for i in range(pars.num_epochs):
	# Run one epoch of training
	train_error_rates.append(run_epoch(net, i, train, pars, num=pars.numtrain, ttype="train"))
	# Test on validation set.
	test_error_rates.append(net_test(net, val, pars))

original_train_error_rates = train_error_rates
original_test_error_rates = test_error_rates

# Save the model to a file
if not os.path.isdir(os.path.join(predir,'part2b')):
	os.mkdir(os.path.join(predir,'part2b'))
torch.save(net.state_dict(), os.path.join(predir,'part2b',pars.model_name))

conv1 torch.Size([1, 32, 16, 16])
conv2 torch.Size([1, 64, 8, 8])
input dimension to fc1 4096
conv1.weight torch.Size([32, 3, 5, 5])
conv1.bias torch.Size([32])
conv2.weight torch.Size([64, 32, 5, 5])
conv2.bias torch.Size([64])
fc1.weight torch.Size([256, 4096])
fc1.bias torch.Size([256])
fc_final.weight torch.Size([10, 256])
fc_final.bias torch.Size([10])
tot_pars 1105098



	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 0: Avg. loss: 0.0022, Accuracy: 1963/10000 (19.63%)


Validation set: Avg. loss: 0.0021, Accuracy: 1261/5000 (25.22%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 1: Avg. loss: 0.0020, Accuracy: 2825/10000 (28.25%)


Validation set: Avg. loss: 0.0019, Accuracy: 1569/5000 (31.38%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 2: Avg. loss: 0.0018, Accuracy: 3537/10000 (35.37%)


Validation set: Avg. loss: 0.0018, Accuracy: 1808/5000 (36.16%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 3: Avg. loss: 0.0017, Accuracy: 4019/10000 (40.19%)


Validation set: Avg. loss: 0.0017, Accuracy: 2048/5000 (40.96%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 4: Avg. loss: 0.0016, Accuracy: 4382/10000 (43.82%)


Validation set: Avg. loss: 0.0016, Accuracy: 2055/5000 (41.10%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 5: Avg. loss: 0.0015, Accuracy: 4499/10000 (44.99%)


Validation set: Avg. loss: 0.0016, Accuracy: 2183/5000 (43.66%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 6: Avg. loss: 0.0015, Accuracy: 4675/10000 (46.75%)


Validation set: Avg. loss: 0.0016, Accuracy: 2210/5000 (44.20%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 7: Avg. loss: 0.0015, Accuracy: 4778/10000 (47.78%)


Validation set: Avg. loss: 0.0015, Accuracy: 2277/5000 (45.54%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 8: Avg. loss: 0.0014, Accuracy: 4963/10000 (49.63%)


Validation set: Avg. loss: 0.0014, Accuracy: 2364/5000 (47.28%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 9: Avg. loss: 0.0014, Accuracy: 5038/10000 (50.38%)


Validation set: Avg. loss: 0.0014, Accuracy: 2485/5000 (49.70%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 10: Avg. loss: 0.0013, Accuracy: 5271/10000 (52.71%)


Validation set: Avg. loss: 0.0014, Accuracy: 2526/5000 (50.52%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 11: Avg. loss: 0.0013, Accuracy: 5374/10000 (53.74%)


Validation set: Avg. loss: 0.0014, Accuracy: 2618/5000 (52.36%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 12: Avg. loss: 0.0013, Accuracy: 5472/10000 (54.72%)


Validation set: Avg. loss: 0.0013, Accuracy: 2603/5000 (52.06%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 13: Avg. loss: 0.0012, Accuracy: 5609/10000 (56.09%)


Validation set: Avg. loss: 0.0013, Accuracy: 2642/5000 (52.84%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 14: Avg. loss: 0.0012, Accuracy: 5698/10000 (56.98%)


Validation set: Avg. loss: 0.0013, Accuracy: 2734/5000 (54.68%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 15: Avg. loss: 0.0012, Accuracy: 5793/10000 (57.93%)


Validation set: Avg. loss: 0.0013, Accuracy: 2750/5000 (55.00%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 16: Avg. loss: 0.0011, Accuracy: 5879/10000 (58.79%)


Validation set: Avg. loss: 0.0013, Accuracy: 2774/5000 (55.48%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 17: Avg. loss: 0.0012, Accuracy: 5931/10000 (59.31%)


Validation set: Avg. loss: 0.0013, Accuracy: 2702/5000 (54.04%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 18: Avg. loss: 0.0011, Accuracy: 6059/10000 (60.59%)


Validation set: Avg. loss: 0.0012, Accuracy: 2799/5000 (55.98%)




	0%|          | 0/10 [00:00<?, ?it/s]



Training set epoch 19: Avg. loss: 0.0011, Accuracy: 6213/10000 (62.13%)


Validation set: Avg. loss: 0.0012, Accuracy: 2818/5000 (56.36%)

Plot training and validation error against epoch number.

plt.plot(range(pars.num_epochs), train_error_rates, label="Training")
plt.plot(range(pars.num_epochs), test_error_rates, label="Validation")
plt.xlabel("Epoch")
plt.ylabel("Error Rate (%)")
plt.legend()
plt.show()

png

Plot the first layer filters.

filters = net.conv1.weight.detach().cpu().numpy()

fig, axes = plt.subplots(4, 8, figsize=(10, 5))
for i, ax in enumerate(axes.flatten()):
	img = filters[i, 0, :, :]
	ax.imshow(img)
	ax.axis("off")
plt.show()

png

from skimage.color import rgb2hsv, hsv2rgb

def modify_saturation(data, min_factor=0.75, max_factor=1.25):
	images, labels = data
	modified_images = []

	for img in images:
		img_np = img / 255

		# Convert to the expected shape (32, 32, 3)
		img_np = np.transpose(img_np, (1, 2, 0))

		# Convert to HSV
		img_hsv = rgb2hsv(img_np)

		# Modify saturation
		factor = np.random.uniform(min_factor, max_factor)  # Moved inside the for loop
		img_hsv[:, :, 1] = np.clip(img_hsv[:, :, 1] * factor, 0, 1)

		# Convert back to RGB
		img_modified = hsv2rgb(img_hsv)

		# Convert back to the original shape (3, 32, 32)
		img_modified = np.transpose(img_modified, (2, 0, 1))

		modified_images.append(img_modified)

	modified_images_np = np.array(modified_images) * 255
	return (modified_images_np, labels)

Show some of the resulting images.

num_examples = 10
fig, axes = plt.subplots(2, num_examples, figsize=(10, 5))

val_sat_dat, val_sat_labels = modify_saturation(val)
for i in range(num_examples):
	axes[0, i].imshow(np.transpose(val[0][i], (1, 2, 0)))
	if i == 0:
		axes[0, i].set_title('Original')
	axes[0, i].axis('off')

	axes[1, i].imshow(np.transpose(val_sat_dat[i], (1, 2, 0)))
	if i == 0:
		axes[1, i].set_title('After Saturation')
	axes[1, i].axis('off')

plt.show()

png

Colorful Classifications