

实操教程｜使用PyTorch和Keras实现 pix2pix GAN

极市平台

2022-08-20

导读：↑ 点击蓝字关注极市平台作者丨vargha khallokhi来源丨DeepHub IMBA编辑丨极市平台

↑ 点击蓝字关注极市平台

作者丨vargha khallokhi

来源丨DeepHub IMBA

编辑丨极市平台

极市导读

本文展现了用pytorch和Keras的代码实现pix2pix GAN模型的全过程，附有详细的代码。>>【极市平台 x VALSE2022 CV开发者之夜】非正式交流晚宴向您发出邀请！🌟🌟🌟来现场朋友请戳此报名：https://sourl.cn/eiRQ84

对比两个框架实现同一个模型到底有什么区别？

第一步，我们对数据集进行图像预处理。我们在这里选择 Facades 数据集，我们将 2 张图像合并为一张，以便在训练过程中进行一些增强。

Pytorch：

 def \_\_getitem\_\_\(self, index\):img = Image.open\(self.files\[index \% len\(self.files\)\]\)  
  w, h = img.size  
  img\_A = img.crop\(\(0, 0, w / 2, h\)\)  
  img\_B = img.crop\(\(w / 2, 0, w, h\)\)if np.random.random\(\) \< 0.5:  
  img\_A = Image.fromarray\(np.array\(img\_A\)\[:, ::-1, :\], "RGB"\)  
  img\_B = Image.fromarray\(np.array\(img\_B\)\[:, ::-1, :\], "RGB"\)img\_A = self.transform\(img\_A\)  
  img\_B = self.transform\(img\_B\)return \{"A": img\_A, "B": img\_B\}

Keras：

 def load\_batch\(self, batch\_size=1, is\_testing=False\):  
  data\_type = "train" if not is\_testing else "val"  
  path = glob\('./datasets/\%s/\%s/\*' \% \(self.dataset\_name, data\_type\)\)self.n\_batches = int\(len\(path\) / batch\_size\)for i in range\(self.n\_batches-1\):  
  batch = path\[i\*batch\_size:\(i+1\)\*batch\_size\]  
  imgs\_A, imgs\_B = \[\], \[\]  
  for img in batch:  
  img = self.imread\(img\)  
  h, w, \_ = img.shape  
  half\_w = int\(w/2\)  
  img\_A = img\[:, :half\_w, :\]  
  img\_B = img\[:, half\_w:, :\]img\_A = resize\(img\_A, self.img\_res\)  
  img\_B = resize\(img\_B, self.img\_res\)if not is\_testing and np.random.random\(\) > 0.5:  
  img\_A = np.fliplr\(img\_A\)  
  img\_B = np.fliplr\(img\_B\)imgs\_A.append\(img\_A\)  
  imgs\_B.append\(img\_B\)imgs\_A = np.array\(imgs\_A\)/127.5 - 1.  
  imgs\_B = np.array\(imgs\_B\)/127.5 - 1.yield imgs\_A, imgs\_B

模型

在论文中提到使用的模型是 U-Net，所以需要使用层间的跳跃连接（恒等函数）。使用上采样和下采样卷积制作自编码器生成和判别模型。

Pytorch：

 class Generator\(nn.Module\):  
  def \_\_init\_\_\(self, in\_channels=3, out\_channels=3\):  
  super\(Generator, self\).\_\_init\_\_\(\)  
  self.down1 = DownSampleConv\(in\_channels, 64, batchnorm=False\)  
  self.down2 = DownSampleConv\(64, 128\)  
  self.down3 = DownSampleConv\(128, 256\)  
  self.down4 = DownSampleConv\(256, 512, dropout\_rate=0.5\)  
  self.down5 = DownSampleConv\(512, 512, dropout\_rate=0.5\)  
  self.down6 = DownSampleConv\(512, 512, dropout\_rate=0.5\)  
  self.down7 = DownSampleConv\(512, 512, dropout\_rate=0.5\)  
  self.down8 = DownSampleConv\(512, 512, batchnorm=False, dropout\_rate=0.5\)self.up1 = UpSampleConv\(512, 512, dropout\_rate=0.5\)  
  self.up2 = UpSampleConv\(1024, 512, dropout\_rate=0.5\)  
  self.up3 = UpSampleConv\(1024, 512, dropout\_rate=0.5\)  
  self.up4 = UpSampleConv\(1024, 512, dropout\_rate=0.5\)  
  self.up5 = UpSampleConv\(1024, 256\)  
  self.up6 = UpSampleConv\(512, 128\)  
  self.up7 = UpSampleConv\(256, 64\)self.last\_conv = nn.Sequential\(  
  nn.Upsample\(scale\_factor=2\),  
  nn.ZeroPad2d\(\(1, 0, 1, 0\)\),  
  nn.Conv2d\(128, out\_channels, 4, padding=1\),  
  nn.Tanh\(\),  
  \)def forward\(self, x\):  
  ds1 = self.down1\(x\)  
  ds2 = self.down2\(ds1\)  
  ds3 = self.down3\(ds2\)  
  ds4 = self.down4\(ds3\)  
  ds5 = self.down5\(ds4\)  
  ds6 = self.down6\(ds5\)  
  ds7 = self.down7\(ds6\)  
  ds8 = self.down8\(ds7\)  
  us1 = self.up1\(ds8, ds7\)  
  us2 = self.up2\(us1, ds6\)  
  us3 = self.up3\(us2, ds5\)  
  us4 = self.up4\(us3, ds4\)  
  us5 = self.up5\(us4, ds3\)  
  us6 = self.up6\(us5, ds2\)  
  us7 = self.up7\(us6, ds1\)  
  return self.last\_conv\(us7\)class Discriminator\(nn.Module\):  
  def \_\_init\_\_\(self, in\_channels=3\):  
  super\(Discriminator, self\).\_\_init\_\_\(\)self.model = nn.Sequential\(  
  DownSampleConv\(in\_channels + in\_channels, 64, batchnorm=False, inplace=True\),  
  DownSampleConv\(64, 128, inplace=True\),  
  DownSampleConv\(128, 256, inplace=True\),  
  DownSampleConv\(256, 512, inplace=True\),  
  nn.ZeroPad2d\(\(1, 0, 1, 0\)\),  
  nn.Conv2d\(512, 1, \(4, 4\), padding=1, bias=False\)  
  \)def forward\(self, x, y\):  
  img\_input = torch.cat\(\[x, y\], 1\)  
  return self.model\(img\_input\)

Keras

 def build\_generator\(self\):  
  initializers = RandomNormal\(stddev=0.02\)  
  input\_image = Input\(shape=self.img\_shape\)  
  e1 = self.encoder\_block\(input\_image, 64, batchnorm=False\)  
  e2 = self.encoder\_block\(e1, 128\)  
  e3 = self.encoder\_block\(e2, 256\)  
  e4 = self.encoder\_block\(e3, 512\)  
  e5 = self.encoder\_block\(e4, 512\)  
  e6 = self.encoder\_block\(e5, 512\)  
  e7 = self.encoder\_block\(e6, 512\)d2 = self.decoder\_block\(e7, e6, 512\)  
  d3 = self.decoder\_block\(d2, e5, 512\)  
  d4 = self.decoder\_block\(d3, e4, 512\)  
  d5 = self.decoder\_block\(d4, e3, 256\)  
  d6 = self.decoder\_block\(d5, e2, 128\)  
  d7 = self.decoder\_block\(d6, e1, 64\)  
  up = UpSampling2D\(size=2\)\(d7\)  
  output\_image = Conv2D\(self.channels, \(4, 4\), strides=1, padding='same', kernel\_initializer=initializers,  
  activation='tanh'\)\(up\)  
  model = Model\(input\_image, output\_image\)  
  return modeldef build\_discriminator\(self\):  
  initializers = RandomNormal\(stddev=0.02\)  
  input\_source\_image = Input\(self.img\_shape\)  
  input\_target\_image = Input\(self.img\_shape\)  
  merged\_input = Concatenate\(axis=-1\)\(\[input\_source\_image, input\_target\_image\]\)  
  filters\_list = \[64, 128, 256, 512\]def disc\_layer\(input\_layer, filters, kernel\_size=\(4, 4\), batchnorm=True\):  
  x = Conv2D\(filters, kernel\_size=kernel\_size, strides=2, padding='same', kernel\_initializer=initializers\)\(  
  input\_layer\)  
  x = LeakyReLU\(0.2\)\(x\)  
  if batchnorm:  
  x = BatchNormalization\(\)\(x\)  
  return xx = disc\_layer\(merged\_input, filters\_list\[0\], batchnorm=False\)  
  x = disc\_layer\(x, filters\_list\[1\]\)  
  x = disc\_layer\(x, filters\_list\[2\]\)  
  x = disc\_layer\(x, filters\_list\[3\]\)discriminator\_output = Conv2D\(1, kernel\_size=\(4, 4\), padding='same', kernel\_initializer=initializers\)\(x\)  
  model = Model\(\[input\_source\_image, input\_target\_image\], discriminator\_output\)  
  return model

训练过程

对于训练，我们继续使用生成器和鉴别器架构。使用论文中建议的权重初始化方法更改权重初始化器（权重从均值为 0 的高斯分布初始化，标准差 0.02)。此外还有一些训练的超参数。(Adam优化器，LR=0.0002, B1=0.5, B2=0.999)

Pytorch：

 \#  Training  
  prev\_time = time.time\(\)  
  for epoch in range\(init\_epoch, n\_epochs\):  
  for i, batch in enumerate\(dataloader\):real\_A = Variable\(batch\["B"\].type\(Tensor\)\)  
  real\_B = Variable\(batch\["A"\].type\(Tensor\)\)valid = Variable\(Tensor\(np.ones\(\(real\_A.size\(0\), \*patch\)\)\), requires\_grad=False\)  
  fake = Variable\(Tensor\(np.zeros\(\(real\_A.size\(0\), \*patch\)\)\), requires\_grad=False\)#  Train Generators  
  optimizer\_G.zero\_grad\(\)# GAN loss  
  fake\_B = generator\(real\_A\)  
  pred\_fake = discriminator\(fake\_B, real\_A\)  
  loss\_GAN = criterion\_GAN\(pred\_fake, valid\)  
  loss\_pixel = criterion\_pixelwise\(fake\_B, real\_B\)  
  loss\_G = loss\_GAN + lambda\_pixel \* loss\_pixel  
  loss\_G.backward\(\)  
  optimizer\_G.step\(\)#  Train Discriminator  
  optimizer\_D.zero\_grad\(\)  
  pred\_real = discriminator\(real\_B, real\_A\)  
  loss\_real = criterion\_GAN\(pred\_real, valid\)  
  pred\_fake = discriminator\(fake\_B.detach\(\), real\_A\)  
  loss\_fake = criterion\_GAN\(pred\_fake, fake\)  
  loss\_D = \(loss\_real + loss\_fake\) \* 0.5  
  loss\_D.backward\(\)  
  optimizer\_D.step\(\)batches\_done = epoch \* len\(dataloader\) + i  
  batches\_left = n\_epochs \* len\(dataloader\) - batches\_done  
  time\_left = datetime.timedelta\(seconds=batches\_left \* \(time.time\(\) - prev\_time\)\)  
  prev\_time = time.time\(\)# Print log  
  sys.stdout.write\(  
  "\\r\[Epoch \%d/\%d\] \[Batch \%d/\%d\] \[D loss: \%f\] \[G loss: \%f, pixel: \%f, adv: \%f\] ETA: \%s"  
  \% \(epoch, n\_epochs, i, len\(dataloader\), loss\_D.item\(\), loss\_G.item\(\), loss\_pixel.item\(\),  
  loss\_GAN.item\(\), time\_left\)  
  \)  
  G\_losses.append\(loss\_G.item\(\)\)  
  D\_losses.append\(loss\_D.item\(\)\)

Keras：

 def train\(self, epochs, batch\_size=1, sample\_interval=50\):  
  valid = np.ones\(\(batch\_size,\) + self.disc\_patch\)  
  fake = np.zeros\(\(batch\_size,\) + self.disc\_patch\)for epoch in range\(epochs\):  
  for batch\_i, \(imgs\_A, imgs\_B\) in enumerate\(self.data\_loader.load\_batch\(batch\_size\)\):fake\_A = self.generator.predict\(imgs\_B\)d\_loss\_real = self.discriminator.train\_on\_batch\(\[imgs\_A, imgs\_B\], valid\)  
  d\_loss\_fake = self.discriminator.train\_on\_batch\(\[fake\_A, imgs\_B\], fake\)  
  d\_loss = 0.5 \* np.add\(d\_loss\_real, d\_loss\_fake\)g\_loss = self.combined.train\_on\_batch\(\[imgs\_A, imgs\_B\], \[valid, imgs\_A\]\)print\(  
  "\[Epoch \%d/\%d\] \[Batch \%d/\%d\] \[D loss: \%f,D Loss Real: \%f, D Loss Fake: \%f, acc: \%3d\%\%\] \[G loss: \%f, L1 Loss: \%f, acc: \%3d\%\%\]" \% \(  
  epoch, epochs,  
  batch\_i, self.data\_loader.n\_batches,  
  d\_loss\[0\], d\_loss\_real\[0\], d\_loss\_fake\[0\], 100 \* d\_loss\[1\],  
  g\_loss\[0\], g\_loss\[1\], 100 \* g\_loss\[2\]\)\)  
  self.G\_losses.append\(g\_loss\[0\]\)  
  self.D\_losses.append\(d\_loss\[0\]\)  
  if batch\_i \% sample\_interval == 0:  
  self.sample\_images\(epoch, batch\_i\)  
  self.plot\_metrics\(self.G\_losses, self.D\_losses\)