
极市导读
本文作者总结了一些语义分割的tricks,附详细代码,主要来自Kaggle论坛上看到的帖子和个人做过的project。 >>加入极市CV技术交流群,走在计算机视觉的最前沿
1. 如何去优化IoU
class BCELoss2d(nn.Module):def __init__(self, weight=None, size_average=True):super(BCELoss2d, self).__init__()self.bce_loss = nn.BCELoss(weight, size_average)def forward(self, logits, targets):probs = F.sigmoid(logits)probs_flat = probs.view (-1)targets_flat = targets.view(-1)return self.bce_loss(probs_flat, targets_flat)
def iou_coef(y_true, y_pred, smooth=1):"""IoU = (|X & Y|)/ (|X or Y|)"""intersection = K.sum(K.abs(y_true * y_pred), axis=-1)union = K.sum((y_true,-1) + K.sum(y_pred,-1) - intersectionreturn (intersection + smooth) / ( union + smooth)def iou_coef_loss(y_true, y_pred):return -iou_coef(y_true, y_pred)
loss = torch.dot(F.relu(errors_sorted), Variable(grad))
1.1 如果你不在乎训练时间的话
def symmetric_lovasz(outputs, targets):return (lovasz_hinge(outputs, targets) + lovasz_hinge(-outputs, 1 - targets)) / 2
1.2 如果你的模型斗不过Hard Examples的话
def focal_loss(self, output, target, alpha, gamma, OHEM_percent):output = output.contiguous().view(-1)target = target.contiguous().view(-1)max_val = (-output).clamp(min=0)loss = output - output * target + max_val + ((-max_val).exp() + (-output - max_val).exp()).log()# This formula gives us the log sigmoid of 1-p if y is 0 and of p if y is 1invprobs = F.logsigmoid(-output * (target * 2 - 1))focal_loss = alpha * (invprobs * gamma).exp() * loss# Online Hard Example Mining: top x% losses (pixel-wise). Refer to http://www.robots.ox.ac.uk/~tvg/publications/2017/0026.pdfOHEM, _ = focal_loss.topk(k=int(OHEM_percent * [*focal_loss.shape][0]))return OHEM.mean()
2. 魔改U-Net
def conv_block(neurons, block_input, bn=False, dropout=None):conv1 = Conv2D(neurons, (3,3), padding='same', kernel_initializer='glorot_normal')(block_input)if bn:conv1 = BatchNormalization()(conv1)conv1 = Activation('relu')(conv1)if dropout is not None:conv1 = SpatialDropout2D(dropout)(conv1)conv2 = Conv2D(neurons, (3,3), padding='same', kernel_initializer='glorot_normal')(conv1)if bn:conv2 = BatchNormalization()(conv2)conv2 = Activation('relu')(conv2)if dropout is not None:conv2 = SpatialDropout2D(dropout)(conv2)pool = MaxPooling2D((2,2))(conv2)return pool, conv2 # returns the block output and the shortcut to use in the uppooling blocksdef middle_block(neurons, block_input, bn=False, dropout=None):conv1 = Conv2D(neurons, (3,3), padding='same', kernel_initializer='glorot_normal')(block_input)if bn:conv1 = BatchNormalization()(conv1)conv1 = Activation('relu')(conv1)if dropout is not None:conv1 = SpatialDropout2D(dropout)(conv1)conv2 = Conv2D(neurons, (3,3), padding='same', kernel_initializer='glorot_normal')(conv1)if bn:conv2 = BatchNormalization()(conv2)conv2 = Activation('relu')(conv2)if dropout is not None:conv2 = SpatialDropout2D(dropout)(conv2)return conv2def deconv_block(neurons, block_input, shortcut, bn=False, dropout=None):deconv = Conv2DTranspose(neurons, (3, 3), strides=(2, 2), padding="same")(block_input)uconv = concatenate([deconv, shortcut])uconv = Conv2D(neurons, (3, 3), padding="same", kernel_initializer='glorot_normal')(uconv)if bn:uconv = BatchNormalization()(uconv)uconv = Activation('relu')(uconv)if dropout is not None:uconv = SpatialDropout2D(dropout)(uconv)uconv = Conv2D(neurons, (3, 3), padding="same", kernel_initializer='glorot_normal')(uconv)if bn:uconv = BatchNormalization()(uconv)uconv = Activation('relu')(uconv)if dropout is not None:uconv = SpatialDropout2D(dropout)(uconv)return uconvdef build_model(start_neurons, bn=False, dropout=None):input_layer = Input((128, 128, 1))# 128 -> 64conv1, shortcut1 = conv_block(start_neurons, input_layer, bn, dropout)# 64 -> 32conv2, shortcut2 = conv_block(start_neurons * 2, conv1, bn, dropout)# 32 -> 16conv3, shortcut3 = conv_block(start_neurons * 4, conv2, bn, dropout)# 16 -> 8conv4, shortcut4 = conv_block(start_neurons * 8, conv3, bn, dropout)#Middleconvm = middle_block(start_neurons * 16, conv4, bn, dropout)# 8 -> 16deconv4 = deconv_block(start_neurons * 8, convm, shortcut4, bn, dropout)# 16 -> 32deconv3 = deconv_block(start_neurons * 4, deconv4, shortcut3, bn, dropout)# 32 -> 64deconv2 = deconv_block(start_neurons * 2, deconv3, shortcut2, bn, dropout)# 64 -> 128deconv1 = deconv_block(start_neurons, deconv2, shortcut1, bn, dropout)#uconv1 = Dropout(0.5)(uconv1)output_layer = Conv2D(1, (1,1), padding="same", activation="sigmoid")(deconv1)model = Model(input_layer, output_layer)return model
def forward(self, x):x = self.conv1(x)x = self.bn1(x)x = self.relu(x)x = self.maxpool(x)x = self.layer1(x)x = self.layer2(x)x = self.layer3(x)x = self.layer4(x)x = self.avgpool(x)x = x.view(x.size(0), -1)x = self.fc(x)return x
def __init__(self):super().__init__()self.resnet = models.resnet34(pretrained=True)self.conv1 = nn.Sequential(self.resnet.conv1,self.resnet.bn1,self.resnet.relu,)self.encoder2 = self.resnet.layer1 # 64self.encoder3 = self.resnet.layer2 #128self.encoder4 = self.resnet.layer3 #256self.encoder5 = self.resnet.layer4 #512self.center = nn.Sequential(ConvBn2d(512,512,kernel_size=3,padding=1),nn.ReLU(inplace=True),ConvBn2d(512,256,kernel_size=3,padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=2,stride=2),)self.decoder5 = Decoder(256+512,512,64)self.decoder4 = Decoder(64 +256,256,64)self.decoder3 = Decoder(64 +128,128,64)self.decoder2 = Decoder(64 +64 ,64 ,64)self.decoder1 = Decoder(64 ,32 ,64)self.logit = nn.Sequential(nn.Conv2d(384, 64, kernel_size=3, padding=1),nn.ELU(inplace=True),nn.Conv2d(64, 1, kernel_size=1, padding=0),)def forward(self, x):mean=[0.485, 0.456, 0.406]std=[0.229,0.224,0.225]x=torch.cat([(x-mean[2])/std[2],(x-mean[1])/std[1],(x-mean[0])/std[0],],1)e1 = self.conv1(x)e2 = self.encoder2(e1)e3 = self.encoder3(e2)e4 = self.encoder4(e3)e5 = self.encoder5(e4)f = self.center(e5)d5 = self.decoder5(f, e5)d4 = self.decoder4(d5,e4)d3 = self.decoder3(d4,e3)d2 = self.decoder2(d3,e2)d1 = self.decoder1(d2)
class sSE(nn.Module):def __init__(self, out_channels):super(sSE, self).__init__()self.conv = ConvBn2d(in_channels=out_channels,out_channels=1,kernel_size=1,padding=0)def forward(self,x):x=self.conv(x)#print('spatial',x.size())x=F.sigmoid(x)return xclass cSE(nn.Module):def __init__(self, out_channels):super(cSE, self).__init__()self.conv1 = ConvBn2d(in_channels=out_channels,out_channels=int(out_channels/2),kernel_size=1,padding=0)self.conv2 = ConvBn2d(in_channels=int(out_channels/2),out_channels=out_channels,kernel_size=1,padding=0)def forward(self,x):x=nn.AvgPool2d(x.size()[2:])(x)#print('channel',x.size())x=self.conv1(x)x=F.relu(x)x=self.conv2(x)x=F.sigmoid(x)return xclass Decoder(nn.Module):def __init__(self, in_channels, channels, out_channels):super(Decoder, self).__init__()self.conv1 = ConvBn2d(in_channels, channels, kernel_size=3, padding=1)self.conv2 = ConvBn2d(channels, out_channels, kernel_size=3, padding=1)self.spatial_gate = sSE(out_channels)self.channel_gate = cSE(out_channels)def forward(self, x, e=None):x = F.upsample(x, scale_factor=2, mode='bilinear', align_corners=True)#print('x',x.size())#print('e',e.size())if e is not None:x = torch.cat([x,e],1)x = F.relu(self.conv1(x),inplace=True)x = F.relu(self.conv2(x),inplace=True)#print('x_new',x.size())g1 = self.spatial_gate(x)#print('g1',g1.size())g2 = self.channel_gate(x)#print('g2',g2.size())x = g1*x + g2*xreturn x
f = torch.cat((F.upsample(e1,scale_factor= 2, mode='bilinear',align_corners=False),d1,F.upsample(d2,scale_factor= 2, mode='bilinear',align_corners=False),F.upsample(d3,scale_factor= 4, mode='bilinear',align_corners=False),F.upsample(d4,scale_factor= 8, mode='bilinear',align_corners=False),F.upsample(d5,scale_factor=16, mode='bilinear',align_corners=False),),1)f = F.dropout2d(f,p=0.50)logit = self.logit(f)
3. Training
CYCLE=8000LR_INIT=0.1LR_MIN=0.001scheduler = lambda x: ((LR_INIT-LR_MIN)/2)*(np.cos(PI*(np.mod(x-1,CYCLE)/(CYCLE)))+1)+LR_MIN
4. 其他的一些小tricks(持续更新)
公众号后台回复“94”获取CVPR 2022-郑兆晖:目标检测定位蒸馏PPT~
# CV技术社群邀请函 #
备注:姓名-学校/公司-研究方向-城市(如:小极-北大-目标检测-深圳)
即可申请加入极市目标检测/图像分割/工业检测/人脸/医学影像/3D/SLAM/自动驾驶/超分辨率/姿态估计/ReID/GAN/图像增强/OCR/视频理解等技术交流群
每月大咖直播分享、真实项目需求对接、求职内推、算法竞赛、干货资讯汇总、与 10000+来自港科大、北大、清华、中科院、CMU、腾讯、百度等名校名企视觉开发者互动交流~

