【视频动作识别】I3D网络的代码细节

视频分析中常用的I3D代码细节

目前写的比较好的两个video understanding base code

这两个代码中对 I 3 D I3D I3D 的实现方式分别如下

#mmactoin

# configs 
backbone=dict(
        type='ResNet_I3D',
        pretrained='modelzoo://resnet50',
        depth=50,
        num_stages=4,
        out_indices=[3],
        frozen_stages=-1,
        inflate_freq=((1,1,1), (1,0,1,0), (1,0,1,0,1,0), (0,1,0)),
        inflate_style='3x1x1',
        conv1_kernel_t=5,
        conv1_stride_t=2,
        pool1_kernel_t=1,
        pool1_stride_t=2,
        bn_eval=False,
        partial_bn=False,
        style='pytorch'),

# code 
# ......
# bottle neck 这部分代码展示了 I3D 的一些细节
# if_inflate 控制 bottle neck 中是否使用时序卷积。
# True
# 一种风格是 Kernel (3,1,1)--》(1,3,3)--》(1,1,1), 正常I3D中的bottle neck就是这样子的。
# 另一个风格是直接用(3,3,3)卷积核 Kernel (1,1,1)-->(3,3,3)---》(1,1,1)

# False
# 不使用时序卷积  (1,1,1)--->(1,1,1)--->(1,1,1)
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self,
                 inplanes,
                 planes,
                 spatial_stride=1,
                 temporal_stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 if_inflate=True,
                 inflate_style='3x1x1',
                 if_nonlocal=True,
                 nonlocal_cfg=None,
                 with_cp=False):
        """Bottleneck block for ResNet.
        If style is "pytorch", the stride-two layer is the 3x3 conv layer,
        if it is "caffe", the stride-two layer is the first 1x1 conv layer.
        """
        super(Bottleneck, self).__init__()
        assert style in ['pytorch', 'caffe']
        assert inflate_style in ['3x1x1', '3x3x3']
        self.inplanes = inplanes
        self.planes = planes
        if style == 'pytorch':
            self.conv1_stride = 1
            self.conv2_stride = spatial_stride
            self.conv1_stride_t = 1
            self.conv2_stride_t = temporal_stride
        else:
            self.conv1_stride = spatial_stride
            self.conv2_stride = 1
            self.conv1_stride_t = temporal_stride
            self.conv2_stride_t = 1
        if if_inflate:
          if inflate_style == '3x1x1':
              self.conv1 = nn.Conv3d(
                  inplanes,
                  planes,
                  kernel_size=(3,1,1),
                  stride=(self.conv1_stride_t, self.conv1_stride, self.conv1_stride),
                  padding=(1,0,0),
                  bias=False)
              self.conv2 = nn.Conv3d(
                  planes,
                  planes,
                  kernel_size=(1,3,3),
                  stride=(self.conv2_stride_t, self.conv2_stride, self.conv2_stride),
                  padding=(0, dilation, dilation),
                  dilation=(1, dilation, dilation),
                  bias=False)
          else:
              self.conv1 = nn.Conv3d(
                  inplanes,
                  planes,
                  kernel_size=1,
                  stride=(self.conv1_stride_t, self.conv1_stride, self.conv1_stride),
                  bias=False)
              self.conv2 = nn.Conv3d(
                  planes,
                  planes,
                  kernel_size=3,
                  stride=(self.conv2_stride_t, self.conv2_stride, self.conv2_stride),
                  padding=(1, dilation, dilation),
                  dilation=(1, dilation, dilation),
                  bias=False)
        else:
            self.conv1 = nn.Conv3d(
                inplanes,
                planes,
                kernel_size=1,
                stride=(1, self.conv1_stride, self.conv1_stride),
                bias=False)
            self.conv2 = nn.Conv3d(
                planes,
                planes,
                kernel_size=(1,3,3),
                stride=(1, self.conv2_stride, self.conv2_stride),
                padding=(0, dilation, dilation),
                dilation=(1, dilation, dilation),
                bias=False)

        self.bn1 = nn.BatchNorm3d(planes)
        self.bn2 = nn.BatchNorm3d(planes)
        self.conv3 = nn.Conv3d(
            planes, planes * self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm3d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.spatial_tride = spatial_stride
        self.temporal_tride = temporal_stride
        self.dilation = dilation
        self.with_cp = with_cp

        if if_nonlocal and nonlocal_cfg is not None:
            nonlocal_cfg_ = nonlocal_cfg.copy()
            nonlocal_cfg_['in_channels'] = planes * self.expansion
            self.nonlocal_block = build_nonlocal_block(nonlocal_cfg_)
        else:
            self.nonlocal_block = None

    def forward(self, x)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值