阅读此篇文章的感触:
首先针对ACM提出两种类型:DCM和GCM
1. 首先介绍ACM的组成:
① 提出------adaptive context pooling, 包括一个1*1操作和softmax,以及transpose操作
②context shifting, 就是将context pooling特征图谱经过两个 1 × 1 convolutions with non-linear activation(1*1+BN+ReLu+1*1+BN+ReLu)和sigmoid函数
③ 将求得的shift通道权重与输入特征图谱进行像素相乘。
以上就是ACM的答题过程,针对DCM和GCM两种模块ACM的第一个步骤是不一样的。
2.DCM 的介绍
首先它是针对不同分支的不同分辨率进行聚合的,因此在ACM的第一步adaptive context pooling会有所不同。DCM的操作过程是将所有输入尺寸除以最小分辨率的尺寸,即,然后在合并(concat)
具体代码如下
class DenseContextModeling(nn.Module):
def __init__(self, channels, reduction):
super().__init__()
num_branches = len(channels)
self.reduction = reduction[num_branches-2]
self.channels = channels
total_channel = sum(channels)
mid_channels = total_channel // self.reduction
##这个是ACM中的adaptive context pooling, a context mask,一个1*1卷积和softmax部分
self.conv_mask = nn.ModuleList([
nn.Conv2d(channels[i], 1, kernel_size=1, stride=1, padding=0, bias=True)
for i in range(len(channels))
])
self.softmax = nn.Softmax(dim=2)
##这个是shift操作——2个1*1卷积和一个sigmoid函数
self.channel_attention = nn.Sequential(
nn.Conv2d(total_channel, mid_channels, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, total_channel, kernel_size=1, stride=1, padding=0, bias=True),
nn.Sigmoid()
)
##这个就是真正的ACM实现
def global_spatial_pool(self, x, mini_size, i):
batch, channel, height, width = x.size()
mini_height, mini_width = mini_size
# [N, C, H, W]
x_m = x
# [N, C, H * W]
x_m = x_m.view(batch, channel, height * width)
# [N, MH * MW, C, (H * W) / (MH * MW)]
x_m = x_m.view(batch, mini_height * mini_width, channel, (height * width) // (mini_height * mini_width))
# [N, 1, H, W]
mask = self.conv_mask[i](x)
# [N, 1, H * W]
mask = mask.view(batch, 1, height * width)
# [N, 1, H * W]
mask = self.softmax(mask)
# [N, MH * MW, (H * W) / (MH * MW)]
mask = mask.view(batch, mini_height * mini_width, (height * width) // (mini_height * mini_width))
# [N, MH * MW, (H * W) / (MH * MW), 1]
mask = mask.unsqueeze(-1)
# [N, MH * MW, C, 1]
x = torch.matmul(x_m, mask)
# [N, C, MH * MW, 1]
x = x.permute(0, 2, 1, 3)
# [N, C, MH, MW]
x = x.view(batch, channel, mini_height, mini_width)
return x
def forward(self, x):
mini_size = x[-1].size()[-2:]
out = [self.global_spatial_pool(s, mini_size, i) for s, i in zip(x[:-1], range(len(x)))] + [x[-1]]
out = torch.cat(out, dim=1)
out = self.channel_attention(out)
out = torch.split(out, self.channels, dim=1)
out = [s * F.interpolate(a, size=s.size()[-2:], mode='nearest') for s, a in zip(x, out)]
return out
2.GCM
代码
class GlobalContextModeling(nn.Module):
def __init__(self, channels, num_branch, reduction, with_cp=False):
super().__init__()
self.with_cp = with_cp
self.reduction = reduction[num_branch]
mid_channels = channels // self.reduction
self.conv_mask = nn.Conv2d(channels, 1, kernel_size=1, stride=1, padding=0, bias=True)
self.softmax = nn.Softmax(dim=2)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, mid_channels, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, channels, kernel_size=1, stride=1, padding=0, bias=True),
nn.Sigmoid()
)
self.bn = nn.BatchNorm2d(channels)
def global_spatial_pool(self, x):
batch, channel, height, width = x.size()
# [N, C, H, W]
x_m = x
# [N, C, H * W]
x_m = x_m.view(batch, channel, height * width)
# [N, 1, C, H * W]
x_m = x_m.unsqueeze(1)
# [N, 1, H, W]
mask = self.conv_mask(x)
# [N, 1, H * W]
mask = mask.view(batch, 1, height * width)
# [N, 1, H * W]
mask = self.softmax(mask)
# [N, 1, H * W, 1]
mask = mask.unsqueeze(-1)
# [N, 1, C, 1]
x = torch.matmul(x_m, mask)
# [N, C, 1, 1]
x = x.permute(0, 2, 1, 3)
return x
def forward(self, x):
def _inner_forward(x):
identity = x
x = self.global_spatial_pool(x)
x = self.channel_attention(x)
x = self.bn(identity * x)
return x
if self.with_cp and x.requires_grad:
x = cp.checkpoint(_inner_forward, x)
else:
x = _inner_forward(x)
return x
DSC代码
class DynamicSplitConvolution(nn.Module):
def __init__(self, channels, stride, num_branch, num_groups, num_kernels, with_cp=False):
super().__init__()
self.with_cp = with_cp
self.num_groups = num_groups[num_branch]
self.num_kernels = num_kernels[num_branch]
self.split_channels = _split_channels(channels, self.num_groups)
self.conv = nn.ModuleList([
ConvBN(
self.split_channels[i],
self.split_channels[i],
kernel_size=i * 2 + 3,
stride=stride,
padding=i + 1,
groups=self.split_channels[i],
num_kernels=self.num_kernels)
for i in range(self.num_groups)
])
def forward(self, x):
def _inner_forward(x):
if self.num_groups == 1:
x = self.conv[0](x)
else:
x_split = torch.split(x, self.split_channels, dim=1)
x = [conv(t) for conv, t in zip(self.conv, x_split)]
x = torch.cat(x, dim=1)
x = channel_shuffle(x, self.num_groups)
return x
if self.with_cp and x.requires_grad:
x = cp.checkpoint(_inner_forward, x)
else:
x = _inner_forward(x)
return x
在这里所有的卷积和采用的是动态卷积生成卷积核
动态卷积采用的是每一个输入特征图谱都有K个不同的卷积核(卷积核的大小一样,不一样的是参数值),如何生成K个不同的卷积核,采用pytorch里面的F.conv2函数自己定义。首先是生成batchsize个K个卷积核,采用的是SENet函数。然后是对F.conv2函数进行研究发现维度和现有想法不一致,如何实现每一个输入尺寸特征图谱有K个不同卷积核参数,现将输入维度变成batchsize*inplane,然后group=batchsize,这样,每一个分支代表一个特征图谱上的输入channel个数,且每一个特征图谱具有K个不同卷积核参数。
class KernelAttention(nn.Module):
def __init__(self, channels, reduction=4, num_kernels=4, init_weight=True):
super().__init__()
if channels != 3:
mid_channels = channels // reduction
else:
mid_channels = num_kernels
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv1 = nn.Conv2d(channels, mid_channels, kernel_size=1, bias=False)
self.bn = nn.BatchNorm2d(mid_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(mid_channels, num_kernels, kernel_size=1, bias=True)
self.sigmoid = nn.Sigmoid()
if init_weight:
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
if isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.avg_pool(x)
x = self.conv1(x)
x = self.bn(x)
x = self.relu(x)
x = self.conv2(x).view(x.shape[0], -1)
x = self.sigmoid(x)
return x
class KernelAggregation(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, num_kernels,
init_weight=True):
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.bias = bias
self.num_kernels = num_kernels
self.weight = nn.Parameter(
torch.randn(num_kernels, out_channels, in_channels // groups, kernel_size, kernel_size),
requires_grad=True)
if bias:
self.bias = nn.Parameter(
torch.zeros(num_kernels, out_channels))
else:
self.bias = None
if init_weight:
self._initialize_weights()
def _initialize_weights(self):
for i in range(self.num_kernels):
nn.init.kaiming_uniform_(self.weight[i])
def forward(self, x, attention):
batch_size, in_channels, height, width = x.size()
x = x.contiguous().view(1, batch_size * self.in_channels, height, width)
weight = self.weight.contiguous().view(self.num_kernels, -1)
weight = torch.mm(attention, weight).contiguous().view(
batch_size * self.out_channels,
self.in_channels // self.groups,
self.kernel_size,
self.kernel_size)
if self.bias is not None:
bias = torch.mm(attention, self.bias).contiguous().view(-1)
x = F.conv2d(
x,
weight=weight,
bias=bias,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups * batch_size)
else:
x = F.conv2d(
x,
weight=weight,
bias=None,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups * batch_size)
x = x.contiguous().view(batch_size, self.out_channels, x.shape[-2], x.shape[-1])
return x
class DynamicKernelAggregation(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True,
num_kernels=4):
super().__init__()
assert in_channels % groups == 0
self.attention = KernelAttention(
in_channels,
num_kernels=num_kernels)
self.aggregation = KernelAggregation(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=bias,
num_kernels=num_kernels)
def forward(self, x):
attention = x
attention = self.attention(attention)
x = self.aggregation(x, attention)
return x
Enable GingerCannot connect to Ginger Check your internet connection
or reload the browserDisable in this text fieldRephraseRephrase current sentence7Log in to edit with Ginger×