import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
BATCH_SIZE=512 # 批次大小
EPOCHS=10 # 总共训练批次
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 让torch判断是否使用GPU,建议使用GPU环境,因为会快很多
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=BATCH_SIZE, shuffle=True)
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
# 1,28x28
self.conv1=nn.Conv2d(1,16,5) # 24x24
self.pool = nn.MaxPool2d(2,2) # 12x12
self.conv2=nn.Conv2d(16,32,3) # 10x10
self.fc = nn.Linear(32*5*5,10)
def forward(self,x):
in_size = x.size(0)
out = self.conv1(x) #24
out = F.relu(out)
out = self.pool(out) #12
out = self.conv2(out) #10
out = F.relu(out)
out = self.pool(out)
out = out.view(in_size,-1)
out = self.fc(out)
out = F.log_softmax(out,dim=1)
return out
model = ConvNet().to(DEVICE)
optimizer = optim.Adam(model.parameters())
def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if (batch_idx + 1) % 30 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # 将一批的损失相加
pred = output.max(1, keepdim=True)[1] # 找到概率最大的下标
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
for epoch in range(1, EPOCHS + 1):
train(model, DEVICE, train_loader, optimizer, epoch)
test(model, DEVICE, test_loader)
import numpy as np
import matplotlib.pyplot as plt
def plot_fft2d(input):
m,n,h,w=input.shape
x=np.arange(0,h,1)
y=np.arange(0,w,1)
X,Y=np.meshgrid(x,y)
output=np.fft.fft2(input,axes=(-2,-1))
print(output.shape)
var=np.var(output, axis=(0, 1), keepdims=False)
print(var.shape)
ax = plt.axes(projection='3d')
ax.scatter3D(X, Y, var, cmap='Blues') # 绘制散点图
ax.plot_surface(X, Y, var, cmap='rainbow')
plt.show()
param=model.state_dict()
W1=param['conv1.weight'] #(1,28,28) * (16,1,5,5) ---> (16,24,24)
W2=param['conv2.weight'] #(16,12,12) * (32,16,3,3) ---》 (32,10,10)
W1_pad=np.pad(W1.cpu().numpy(),((0,0),(0,0),(0,23),(0,23)))
W2_pad=np.pad(W2.cpu().numpy(),((0,0),(0,0),(0,9),(0,9)))
print(W1_pad.shape)
print(W2_pad.shape)
plot_fft2d(W1_pad)
plot_fft2d(W2_pad)
数据分布的方差如图所示
之前,只统计了权重经过FFT之后的分布情况,而没有统计激活经过FFT变换之后的分布,因此这里作一个补充。 代码:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
BATCH_SIZE=512 # 批次大小
EPOCHS=10 # 总共训练批次
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 让torch判断是否使用GPU,建议使用GPU环境,因为会快很多
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=BATCH_SIZE, shuffle=True)
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
# 1,28x28
self.conv1=nn.Conv2d(1,16,5) # 24x24
self.pool = nn.MaxPool2d(2,2) # 12x12
self.conv2=nn.Conv2d(16,32,3) # 10x10
self.fc = nn.Linear(32*5*5,10)
def forward(self,x):
in_size = x.size(0)
out = self.conv1(x) #24
out = F.relu(out)
out = self.pool(out) #12
out = self.conv2(out) #10
out = F.relu(out)
out = self.pool(out)
out = out.view(in_size,-1)
out = self.fc(out)
out = F.log_softmax(out,dim=1)
return out
model = ConvNet().to(DEVICE)
optimizer = optim.Adam(model.parameters())
def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if (batch_idx + 1) % 30 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # 将一批的损失相加
pred = output.max(1, keepdim=True)[1] # 找到概率最大的下标
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
for epoch in range(1, EPOCHS + 1):
train(model, DEVICE, train_loader, optimizer, epoch)
test(model, DEVICE, test_loader)
import numpy as np
import matplotlib.pyplot as plt
def plot_fft2d(input):
m,n,h,w=input.shape
x=np.arange(0,h,1)
y=np.arange(0,w,1)
X,Y=np.meshgrid(x,y)
output=np.fft.fft2(input,axes=(-2,-1))
var=np.var(output, axis=(0, 1), keepdims=False)
ax = plt.axes(projection='3d')
ax.scatter3D(X, Y, var, cmap='Blues') # 绘制散点图
ax.plot_surface(X, Y, var, cmap='rainbow')
plt.show()
for data, target in test_loader:
data,target=data[0].resize(1,1,28,28).to(DEVICE),target[0].to(DEVICE)
#
#查看方差分布情况
z=data.cpu().numpy()
print(z.shape)
plot_fft2d(z)
#
x=F.conv2d(data,weight=model.state_dict()['conv1.weight'],bias=model.state_dict()['conv1.bias'],stride=1,padding=0)
x=F.relu(x)
x=F.max_pool2d(x,kernel_size=2,stride=2)
#
# 查看方差分布情况
z = x.cpu().numpy()
print(z.shape)
plot_fft2d(z)
#
x=F.conv2d(x,weight=model.state_dict()['conv2.weight'],bias=model.state_dict()['conv2.bias'],stride=1,padding=0)
x = F.relu(x)
x=F.max_pool2d(x,kernel_size=2,stride=2)
x=x.view(1,-1)
out = F.linear(x, weight=model.state_dict()['fc.weight'],bias=model.state_dict()['fc.bias'])
print(torch.argmax(out,dim=1,keepdim=False).item(),target.item())
结果如下:
综合权重和激活经过FFT变换之后各频率点的方差来看,四个角附近,数据的方差比较大,而中心的方差比较小,因此,有以下设想: 给方差大的频率点较多的比特位宽,给方差较小的频率点较少的比特位宽,以此达到模型压缩的目的。
改进之前统计激活值fft变换之后的方差情况,我们每次只统计一幅特征图,因此方差分布的特点没有权重清晰,现在做如下改进: 每次统计512(即batch_size=512)幅特征图FFT变换之后方差的分布特点,代码如下:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
BATCH_SIZE=512 # 批次大小
EPOCHS=10 # 总共训练批次
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 让torch判断是否使用GPU,建议使用GPU环境,因为会快很多
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=BATCH_SIZE, shuffle=True)
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
# 1,28x28
self.conv1=nn.Conv2d(1,16,5) # 24x24
self.pool = nn.MaxPool2d(2,2) # 12x12
self.conv2=nn.Conv2d(16,32,3) # 10x10
self.fc = nn.Linear(32*5*5,10)
def forward(self,x):
in_size = x.size(0)
out = self.conv1(x) #24
out = F.relu(out)
out = self.pool(out) #12
out = self.conv2(out) #10
out = F.relu(out)
out = self.pool(out)
out = out.view(in_size,-1)
out = self.fc(out)
out = F.log_softmax(out,dim=1)
return out
model = ConvNet().to(DEVICE)
optimizer = optim.Adam(model.parameters())
def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if (batch_idx + 1) % 30 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # 将一批的损失相加
pred = output.max(1, keepdim=True)[1] # 找到概率最大的下标
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
for epoch in range(1, EPOCHS + 1):
train(model, DEVICE, train_loader, optimizer, epoch)
test(model, DEVICE, test_loader)
import numpy as np
import matplotlib.pyplot as plt
def plot_fft2d(input):
m,n,h,w=input.shape
x=np.arange(0,h,1)
y=np.arange(0,w,1)
X,Y=np.meshgrid(x,y)
output=np.fft.fft2(input,axes=(-2,-1))
var=np.var(output, axis=(0, 1), keepdims=False)
ax = plt.axes(projection='3d')
ax.scatter3D(X, Y, var, cmap='Blues') # 绘制散点图
ax.plot_surface(X, Y, var, cmap='rainbow')
plt.show()
for data, target in test_loader:
data,target=data.to(DEVICE),target.to(DEVICE)
#
#查看方差分布情况
z=data.cpu().numpy()
print(z.shape)
plot_fft2d(z)
#
x=F.conv2d(data,weight=model.state_dict()['conv1.weight'],bias=model.state_dict()['conv1.bias'],stride=1,padding=0)
x=F.relu(x)
x=F.max_pool2d(x,kernel_size=2,stride=2)
#
# 查看方差分布情况
z = x.cpu().numpy()
print(z.shape)
plot_fft2d(z)
#
x=F.conv2d(x,weight=model.state_dict()['conv2.weight'],bias=model.state_dict()['conv2.bias'],stride=1,padding=0)
x = F.relu(x)
x=F.max_pool2d(x,kernel_size=2,stride=2)
x=x.view(512,-1)
out = F.linear(x, weight=model.state_dict()['fc.weight'],bias=model.state_dict()['fc.bias'])
print(torch.sum(torch.argmax(out,dim=1,keepdim=False).eq(target)))
实验结果: 可以看到,方差四个角大,中间小的特点比之前明显了。