神经网络的剪枝是压缩神经网络的一个重要方法,某些神经网络(如AlexNet、VGG16等)整个模型所占空间多达上百M,这对桌面CPU和GPU来说也许不算什么,但是,许多深度学习的应用都是需要部署到移动端的,而移动端不同于PC端,它对功耗、面积等条件都十分敏感,举个例子,在iphone手机中,上百MB的软件无法通过4G信号进行下载,必须链接wifi,而且实验表明,存储器读写的功耗一般都大于计算部件所产生的功耗,特别是对DDR的读写功耗,更是比计算部件的功耗高出许多,因此,对神经网络进行压缩是加速神经网络的一个重要方法,下面就初步介绍一下剪枝技术。 所谓剪枝,就是"剪掉”网络中冗余的参数,具体表现为将要剪枝的参数值设为0,使之在今后的推理或者训练中不参与运算。目前,剪枝技术主要有两种,分别是结构化剪枝和非结构化剪枝。
结构化剪枝结构化剪枝,顾名思义,即“有结构”的进行剪枝,对于全连接层权重,一般是剪去整行或者整列,事实上相当于减去了一个神经元,而对于卷积层,结构化剪枝可以是减去某个卷积核(输出通道上剪枝),也可以是剪去某个输入通道。等等
非结构化剪枝非结构化剪枝,则是对单一权重进行修剪,并不要求整行整列的修剪,好处是更能保持原先的精确度,因为结构化剪枝很容易剪去那些比较重要的权重。当然,非结构化剪枝也有其不便之处,因为非结构化剪枝并没有改变权重张量的"形状",如果不采用特殊的压缩存储算法(如CSR、CSC),那么事实上模型的大小并没有发生改变,而是被那些分布无规则的0元素所占据。
实验在pytorch中,剪枝非常方便,我们可以通过语句
import torch.nn.utils.prune as prune
来实现剪枝,下面我用pytorch进行了一个简单的剪枝(非结构化)实验。
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from keras.utils import to_categorical
import numpy as np
import torch.nn.utils.prune as prune
path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()
train_X = train_X.reshape(-1, 28, 28, 1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10)
# 创建网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1=nn.Conv2d(1,32,5,stride=1,padding=0)
self.relu1=nn.ReLU()
self.pool1=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.conv2=nn.Conv2d(32,16,3,stride=1,padding=0)
self.relu2=nn.ReLU()
self.pool2=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.fc1 = nn.Linear(400, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x=self.conv1(x)
x=self.relu1(x)
x=self.pool1(x)
x=self.conv2(x)
x=self.relu2(x)
x=self.pool2(x)
x=x.view(-1,400)
x=self.fc1(x)
x=F.relu(x)
x=self.fc2(x)
x=F.relu(x)
x=self.fc3(x)
return x
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
train_loss = []
precision=0
for epoch in range(10):
for i in range(600):
x=train_X[i*100:i*100+100]
y=train_y[i*100:i*100+100]
x = x.reshape(-1,1,28,28)
x = torch.from_numpy(x) #(batch_size,input_feature_shape)
y = torch.from_numpy(y) #(batch_size,label_onehot_shape)
out = net(x)
loss = F.mse_loss(out, y) # 计算两者的误差
optimizer.zero_grad() # 清空上一步的残余更新参数值
loss.backward() # 误差反向传播, 计算参数更新值
optimizer.step() # 将参数更新值施加到 net 的 parameters 上
train_loss.append(loss.item())
if i % 10 == 0:
print(epoch, i, np.mean(train_loss))
train_loss=[]
if epoch>4 and i%50==0:
module = net.conv1
prune.random_unstructured(module, name='weight', amount=0.01)
print(torch.sum(net.conv1.weight == 0))
total_correct = 0
for i in range(10000):
x = train_X[i]
y = train_y[i]
x = torch.from_numpy(x)
y = torch.from_numpy(y)
x=x.view(1,1,28,28)
y=y.view(1,10)
out = net(x)
pred = out.argmax(dim=1) # 返回值最大的索引
label = y.argmax(dim=1)
correct = pred.eq(label).sum().float().item() # 这个batch中正确的数量
total_correct += correct
acc = total_correct / 10000.0
print('test acc:', acc)
采用l1_unstructured剪枝:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from keras.utils import to_categorical
import numpy as np
import torch.nn.utils.prune as prune
path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()
train_X = train_X.reshape(-1, 28, 28, 1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10)
# 创建网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1=nn.Conv2d(1,32,5,stride=1,padding=0)
self.relu1=nn.ReLU()
self.pool1=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.conv2=nn.Conv2d(32,16,3,stride=1,padding=0)
self.relu2=nn.ReLU()
self.pool2=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.fc1 = nn.Linear(400, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x=self.conv1(x)
x=self.relu1(x)
x=self.pool1(x)
x=self.conv2(x)
x=self.relu2(x)
x=self.pool2(x)
x=x.view(-1,400)
x=self.fc1(x)
x=F.relu(x)
x=self.fc2(x)
x=F.relu(x)
x=self.fc3(x)
return x
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
train_loss = []
precision=0
for epoch in range(6):
for i in range(600):
x=train_X[i*100:i*100+100]
y=train_y[i*100:i*100+100]
x = x.reshape(-1,1,28,28)
x = torch.from_numpy(x) #(batch_size,input_feature_shape)
y = torch.from_numpy(y) #(batch_size,label_onehot_shape)
out = net(x)
loss = F.mse_loss(out, y) # 计算两者的误差
optimizer.zero_grad() # 清空上一步的残余更新参数值
loss.backward() # 误差反向传播, 计算参数更新值
optimizer.step() # 将参数更新值施加到 net 的 parameters 上
train_loss.append(loss.item())
if i % 10 == 0:
print(epoch, i, np.mean(train_loss))
train_loss=[]
if epoch>=4 and i%100==0:
module = net.conv1
prune.l1_unstructured(module, name='weight', amount=0.02)
print(torch.sum(net.conv1.weight == 0))
total_correct = 0
for i in range(10000):
x = train_X[i]
y = train_y[i]
x = torch.from_numpy(x)
y = torch.from_numpy(y)
x=x.view(1,1,28,28)
y=y.view(1,10)
out = net(x)
pred = out.argmax(dim=1) # 返回值最大的索引
label = y.argmax(dim=1)
correct = pred.eq(label).sum().float().item() # 这个batch中正确的数量
total_correct += correct
acc = total_correct / 10000.0
print('test acc:', acc)
print(net.conv1.weight)
结构化剪枝,修剪第0维,采用l2范数:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from keras.utils import to_categorical
import numpy as np
import torch.nn.utils.prune as prune
path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()
train_X = train_X.reshape(-1, 28, 28, 1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10)
# 创建网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1=nn.Conv2d(1,32,5,stride=1,padding=0)
self.relu1=nn.ReLU()
self.pool1=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.conv2=nn.Conv2d(32,16,3,stride=1,padding=0)
self.relu2=nn.ReLU()
self.pool2=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.fc1 = nn.Linear(400, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x=self.conv1(x)
x=self.relu1(x)
x=self.pool1(x)
x=self.conv2(x)
x=self.relu2(x)
x=self.pool2(x)
x=x.view(-1,400)
x=self.fc1(x)
x=F.relu(x)
x=self.fc2(x)
x=F.relu(x)
x=self.fc3(x)
return x
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
train_loss = []
precision=0
for epoch in range(6):
for i in range(600):
x=train_X[i*100:i*100+100]
y=train_y[i*100:i*100+100]
x = x.reshape(-1,1,28,28)
x = torch.from_numpy(x) #(batch_size,input_feature_shape)
y = torch.from_numpy(y) #(batch_size,label_onehot_shape)
out = net(x)
loss = F.mse_loss(out, y) # 计算两者的误差
optimizer.zero_grad() # 清空上一步的残余更新参数值
loss.backward() # 误差反向传播, 计算参数更新值
optimizer.step() # 将参数更新值施加到 net 的 parameters 上
train_loss.append(loss.item())
if i % 10 == 0:
print(epoch, i, np.mean(train_loss))
train_loss=[]
if epoch>=4 and i%100==0:
module = net.conv1
prune.ln_structured(module, name="weight", amount=1, n=2, dim=0)
print(torch.sum(net.conv1.weight == 0))
total_correct = 0
for i in range(10000):
x = train_X[i]
y = train_y[i]
x = torch.from_numpy(x)
y = torch.from_numpy(y)
x=x.view(1,1,28,28)
y=y.view(1,10)
out = net(x)
pred = out.argmax(dim=1) # 返回值最大的索引
label = y.argmax(dim=1)
correct = pred.eq(label).sum().float().item() # 这个batch中正确的数量
total_correct += correct
acc = total_correct / 10000.0
print('test acc:', acc)
print(net.conv1.weight)
对多组参数进行剪枝:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from keras.utils import to_categorical
import numpy as np
import torch.nn.utils.prune as prune
path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()
train_X = train_X.reshape(-1, 28, 28, 1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10)
# 创建网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1=nn.Conv2d(1,32,5,stride=1,padding=0)
self.relu1=nn.ReLU()
self.pool1=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.conv2=nn.Conv2d(32,16,3,stride=1,padding=0)
self.relu2=nn.ReLU()
self.pool2=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.fc1 = nn.Linear(400, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x=self.conv1(x)
x=self.relu1(x)
x=self.pool1(x)
x=self.conv2(x)
x=self.relu2(x)
x=self.pool2(x)
x=x.view(-1,400)
x=self.fc1(x)
x=F.relu(x)
x=self.fc2(x)
x=F.relu(x)
x=self.fc3(x)
return x
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
train_loss = []
precision=0
for epoch in range(6):
for i in range(600):
x=train_X[i*100:i*100+100]
y=train_y[i*100:i*100+100]
x = x.reshape(-1,1,28,28)
x = torch.from_numpy(x) #(batch_size,input_feature_shape)
y = torch.from_numpy(y) #(batch_size,label_onehot_shape)
out = net(x)
loss = F.mse_loss(out, y) # 计算两者的误差
optimizer.zero_grad() # 清空上一步的残余更新参数值
loss.backward() # 误差反向传播, 计算参数更新值
optimizer.step() # 将参数更新值施加到 net 的 parameters 上
train_loss.append(loss.item())
if i % 10 == 0:
print(epoch, i, np.mean(train_loss))
train_loss=[]
if epoch>=4 and i%100==0:
for name, module in net.named_modules():
# prune 20% of connections in all 2D-conv layers
if isinstance(module, torch.nn.Conv2d):
prune.l1_unstructured(module, name='weight', amount=0.02)
# prune 40% of connections in all linear layers
elif isinstance(module, torch.nn.Linear):
prune.l1_unstructured(module, name='weight', amount=0.04)
print(torch.sum(net.conv1.weight == 0))
print(torch.sum(net.fc1.weight ==0))
total_correct = 0
for i in range(10000):
x = train_X[i]
y = train_y[i]
x = torch.from_numpy(x)
y = torch.from_numpy(y)
x=x.view(1,1,28,28)
y=y.view(1,10)
out = net(x)
pred = out.argmax(dim=1) # 返回值最大的索引
label = y.argmax(dim=1)
correct = pred.eq(label).sum().float().item() # 这个batch中正确的数量
total_correct += correct
acc = total_correct / 10000.0
print('test acc:', acc)
print(net.conv1.weight)
print(net.fc1.weight)
全局剪枝:
import torch
from torch import nn
from torch.nn import functional as F
from torch import optim
from keras.utils import to_categorical
import numpy as np
import torch.nn.utils.prune as prune
path="F:\mnist.npz"
f = np.load(path)
train_X, train_y = f['x_train'], f['y_train']
test_X, test_y = f['x_test'], f['y_test']
f.close()
train_X = train_X.reshape(-1, 28, 28, 1)
train_X = train_X.astype('float32')
train_X /= 255
train_y = to_categorical(train_y, 10)
# 创建网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1=nn.Conv2d(1,32,5,stride=1,padding=0)
self.relu1=nn.ReLU()
self.pool1=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.conv2=nn.Conv2d(32,16,3,stride=1,padding=0)
self.relu2=nn.ReLU()
self.pool2=nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
self.fc1 = nn.Linear(400, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x=self.conv1(x)
x=self.relu1(x)
x=self.pool1(x)
x=self.conv2(x)
x=self.relu2(x)
x=self.pool2(x)
x=x.view(-1,400)
x=self.fc1(x)
x=F.relu(x)
x=self.fc2(x)
x=F.relu(x)
x=self.fc3(x)
return x
net = Net()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
train_loss = []
precision=0
for epoch in range(6):
for i in range(600):
x=train_X[i*100:i*100+100]
y=train_y[i*100:i*100+100]
x = x.reshape(-1,1,28,28)
x = torch.from_numpy(x) #(batch_size,input_feature_shape)
y = torch.from_numpy(y) #(batch_size,label_onehot_shape)
out = net(x)
loss = F.mse_loss(out, y) # 计算两者的误差
optimizer.zero_grad() # 清空上一步的残余更新参数值
loss.backward() # 误差反向传播, 计算参数更新值
optimizer.step() # 将参数更新值施加到 net 的 parameters 上
train_loss.append(loss.item())
if i % 10 == 0:
print(epoch, i, np.mean(train_loss))
train_loss=[]
if epoch>=4 and i%100==0:
parameters_to_prune = (
(net.conv1, 'weight'),
(net.conv2, 'weight'),
(net.fc1, 'weight'),
(net.fc2, 'weight'),
(net.fc3, 'weight'),
)
prune.global_unstructured(
parameters_to_prune,
pruning_method=prune.L1Unstructured,
amount=0.02,
)
print(
"Sparsity in conv1.weight: {:.2f}%".format(
100. * float(torch.sum(net.conv1.weight == 0))
/ float(net.conv1.weight.nelement())
)
)
print(
"Sparsity in conv2.weight: {:.2f}%".format(
100. * float(torch.sum(net.conv2.weight == 0))
/ float(net.conv2.weight.nelement())
)
)
print(
"Sparsity in fc1.weight: {:.2f}%".format(
100. * float(torch.sum(net.fc1.weight == 0))
/ float(net.fc1.weight.nelement())
)
)
print(
"Sparsity in fc2.weight: {:.2f}%".format(
100. * float(torch.sum(net.fc2.weight == 0))
/ float(net.fc2.weight.nelement())
)
)
print(
"Sparsity in fc3.weight: {:.2f}%".format(
100. * float(torch.sum(net.fc3.weight == 0))
/ float(net.fc3.weight.nelement())
)
)
print(
"Global sparsity: {:.2f}%".format(
100. * float(
torch.sum(net.conv1.weight == 0)
+ torch.sum(net.conv2.weight == 0)
+ torch.sum(net.fc1.weight == 0)
+ torch.sum(net.fc2.weight == 0)
+ torch.sum(net.fc3.weight == 0)
)
/ float(
net.conv1.weight.nelement()
+ net.conv2.weight.nelement()
+ net.fc1.weight.nelement()
+ net.fc2.weight.nelement()
+ net.fc3.weight.nelement()
)
)
)
total_correct = 0
for i in range(10000):
x = train_X[i]
y = train_y[i]
x = torch.from_numpy(x)
y = torch.from_numpy(y)
x=x.view(1,1,28,28)
y=y.view(1,10)
out = net(x)
pred = out.argmax(dim=1) # 返回值最大的索引
label = y.argmax(dim=1)
correct = pred.eq(label).sum().float().item() # 这个batch中正确的数量
total_correct += correct
acc = total_correct / 10000.0
print('test acc:', acc)
print(net.conv1.weight)
print(net.fc1.weight)