Compare revisions

a1ed242c · a1ed242c · a1ed242c · a1ed242c · a1ed242c · a1ed242c
--- a/src/methods/domer/domer.pdf
+++ b/src/methods/domer/domer.pdf
--- a/src/methods/domer/forward_model.py
+++ b/src/methods/domer/forward_model.py
+"""A file containing the forward operator.
+This file should NOT be modified.
+"""
+import numpy as np
+from src.checks import check_cfa, check_rgb
+class CFA():
+    def __init__(self, cfa: str, input_shape: tuple) -> None:
+        """Constructor of the forward operator's class.
+        Args:
+            cfa (str): Name of the pattern. Either bayer or quad_bayer.
+            input_shape (tuple): Shape of the input images of the operator.
+        """
+        check_cfa(cfa)
+        self.cfa = cfa
+        self.input_shape = input_shape
+        self.output_shape = input_shape[:-1]
+        if self.cfa == 'bayer':
+            self.mask = get_bayer_mask(input_shape)
+        elif self.cfa == 'quad_bayer':
+            self.mask = get_quad_bayer_mask(input_shape)
+    def direct(self, x: np.ndarray) -> np.ndarray:
+        """Applies the CFA operation to the image x.
+        Args:
+            x (np.ndarray): Input image.
+        Returns:
+            np.ndarray: Output image.
+        """
+        check_rgb(x)
+        return np.sum(x * self.mask, axis=2)
+    def adjoint(self, y: np.ndarray) -> np.ndarray:
+        """Applies the adjoint of the CFA operation.
+        Args:
+            y (np.ndarray): Input image.
+        Returns:
+            np.ndarray: Output image.
+        """
+        return self.mask * y[..., np.newaxis]
+def get_bayer_mask(input_shape: tuple) -> np.ndarray:
+    """Return the mask of the Bayer CFA.
+    Args:
+        input_shape (tuple): Shape of the mask.
+    Returns:
+        np.ndarray: Mask.
+    """
+    res = np.kron(np.ones((input_shape[0], input_shape[1], 1)), [0, 1, 0])
+    res[::2, 1::2] = [1, 0, 0]
+    res[1::2, ::2] = [0, 0, 1]
+    return res
+def get_quad_bayer_mask(input_shape: tuple) -> np.ndarray:
+    """Return the mask of the quad_bayer CFA.
+    Args:
+        input_shape (tuple): Shape of the mask.
+    Returns:
+        np.ndarray: Mask.
+    """
+    res = np.kron(np.ones((input_shape[0], input_shape[1], 1)), [0, 1, 0])
+    res[::4, 2::4] = [1, 0, 0]
+    res[::4, 3::4] = [1, 0, 0]
+    res[1::4, 2::4] = [1, 0, 0]
+    res[1::4, 3::4] = [1, 0, 0]
+    res[2::4, ::4] = [0, 0, 1]
+    res[2::4, 1::4] = [0, 0, 1]
+    res[3::4, ::4] = [0, 0, 1]
+    res[3::4, 1::4] = [0, 0, 1]
+    return res
+####
+####
+####
+####      ####                ####        #############
+####      ######              ####      ##################
+####      ########            ####      ####################
+####      ##########          ####      ####        ########
+####      ############        ####      ####            ####
+####      ####  ########      ####      ####            ####
+####      ####    ########    ####      ####            ####
+####      ####      ########  ####      ####            ####
+####      ####  ##    ######  ####      ####          ######
+####      ####  ####      ##  ####      ####    ############
+####      ####  ######        ####      ####    ##########
+####      ####  ##########    ####      ####    ########
+####      ####      ########  ####      ####
+####      ####        ############      ####
+####      ####          ##########      ####
+####      ####            ########      ####
+####      ####              ######      ####
+# 2023
+# Authors: Mauro Dalla Mura and Matthieu Muller
--- a/src/methods/domer/image-colorization/-o
+++ b/src/methods/domer/image-colorization/-o
--- a/src/methods/domer/image-colorization/-r
+++ b/src/methods/domer/image-colorization/-r
--- a/src/methods/domer/image-colorization/LICENSE
+++ b/src/methods/domer/image-colorization/LICENSE
+MIT License
+Copyright (c) 2020 priyavrat-misra
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/src/methods/domer/image-colorization/README.md
+++ b/src/methods/domer/image-colorization/README.md
+# Image Colorization
+![cover](https://github.com/priyavrat-misra/image-colorization/blob/master/images/colorized/bnw_col.png?raw=true "an eye-candy")
+## Contents
+- [Overview](#overview)
+- [Approach](#approach)
+- [Steps](#steps)
+- [Results](#results)
+- [TL;DR](#tldr)
+- [Setup](#setup)
+- [Usage](#usage)
+- [Todo](#todo)
+<br>
+## Overview
+> This project is a Deep Convolutional Neural Network approach to solve the task of image colorization.
+> The goal is to produce a colored image given a grayscale image.<br>
+> At it's heart, it uses Convolutional Auto-Encoders to solve this task.
+> First few layers of [ResNet-18](https://arxiv.org/abs/1512.03385) model are used as the Encoder,
+> and the Decoder consists of a series of Deconvolution layers (i.e., upsample layers followed by convolutions) and residual connections.<br>
+> The model is trained on a subset of [MIT Places365](http://places2.csail.mit.edu/index.html) dataset, consisting of `41000` images of landscapes and scenes.
+## Approach
+> The images in the dataset are in RGB Colorspace.
+> Before loading the images, the images are converted to [LAB colorspace](https://en.wikipedia.org/wiki/CIELAB_color_space).
+> This colorspace contains exactly the same information as RGB.<br>
+> It has 3 channels, `Lightness, A and B`.
+> The lightness channel can be used as the grayscale equivalent of a colored image,
+> the rest 2 channels (A and B) contain the color information.<br>
+>
+> In a nutshell, the training process follows these steps:
+>> 1. The lightness channel is separated from the other 2 channels and used as the model's input.
+>> 2. The model predicts the A and B channels (or 'AB' for short).
+>> 3. The loss is calculated by comparing the predicted AB and the corresponding original AB of the input image.
+>
+> More about the training process can be found [here](https://github.com/priyavrat-misra/image-colorization/blob/master/train.ipynb "train.ipynb").
+## Steps
+> 1. [Defining a model architecture:](https://github.com/priyavrat-misra/image-colorization/blob/master/network.py "network.py")
+>    - The model follows an Auto-Encoder kind of architecture i.e., it has an `encoder` and a `decoder` part.
+>    - The encoder is used to _extract features_ of an image whereas,
+>    - the decoder is used to upsample the features. In other words, it increases the _spacial resolution_.
+>    - In here, the layers of the encoder are taken from ResNet-18 model, and the first conv layer is modified to take a single channel as input (i.e., grayscale or lightness) rather than 3 channels.
+>    - The decoder uses nearest neighbor upsampling (for increasing the spacial resolution),
+>     followed by convolutional layers (for dealing with the depth).
+>    - A more detailed visualization of the model architecture can be seen [here](https://github.com/priyavrat-misra/image-colorization/blob/master/images/architecture.png?raw=true 'after all "A picture is worth a thousand words" :)').
+> 2. [Defining a custom dataloader:](https://github.com/priyavrat-misra/image-colorization/blob/master/utils.py "utils.GrayscaleImageFolder")
+>    - when loading the images, it converts them to LAB, and returns L and AB separately.
+>    - it does few data processing tasks as well like applying tranforms and normalization.
+> 3. [Training the model:](https://github.com/priyavrat-misra/image-colorization/blob/master/train.ipynb "train.ipynb")
+>    - The model is trained for 64 epochs with [Adam Optimization](https://arxiv.org/abs/1412.6980).
+>    - For calculating the loss between the predicted AB and the original AB, Mean Squared Error is used.
+> 4. [Inference:](https://github.com/priyavrat-misra/image-colorization/blob/master/inference.ipynb "inference.ipynb")
+>    - Inference is done with unseen images and the results look promising, or should I say "natural"? :)
+## Results
+> ![results](https://github.com/priyavrat-misra/image-colorization/blob/master/images/results.png?raw=true)
+> _<sup>More colorized examples can be found in [here](https://github.com/priyavrat-misra/image-colorization/blob/master/images/colorized/).<sup>_
+## TL;DR
+> Given an image, the model can colorize it.
+## Setup
+- Clone and change directory:
+```bash
+git clone "https://github.com/priyavrat-misra/image-colorization.git"
+cd image-colorization/
+```
+- Dependencies:
+```bash
+pip install -r requirements.txt
+```
+## Usage
+```bash
+python colorize.py --img-path <path/to/image.jpg> --out-path <path/to/output.jpg> --res 360
+# or the short-way:
+python colorize.py -i <path/to/image.jpg> -o <path/to/output.jpg> -r 360
+```
+_Note:_
+> - As the model is trained with 224x224 images, it gives best results when `--res` is set to lower resolutions (<=480) and okay-ish when set around ~720.
+> - Setting `--res` higher than that of input image won't increase the output's quality.
+<br>
+## Todo
+- [x] define & train a model architecture
+- [x] add argparse support
+- [x] define a more residual architecture
+- [x] use pretrained resnet-18 params for the layers used in the encoder & train the model
+- [x] check how the colorization effect varies with image resolution
+- [x] separate the model from the checkpoint file to a different file
+- [x] complete README.md
+- [ ] deploy with flask
+- [ ] _after that, host it maybe?_
+<br>
+For any queries, feel free to reach me out on [LinkedIn](https://linkedin.com/in/priyavrat-misra/).
\ No newline at end of file
--- a/src/methods/domer/image-colorization/__init__,py
+++ b/src/methods/domer/image-colorization/__init__,py
--- a/src/methods/domer/image-colorization/colorize.py
+++ b/src/methods/domer/image-colorization/colorize.py
+import argparse
+import torch
+from torchvision.utils import save_image
+from network import ColorizeNet
+from utils import load_gray, to_rgb
+import os
+parser = argparse.ArgumentParser(description='colorizes an image')
+parser.add_argument('-i', '--img_path', type=str, metavar='', required=True,
+                    help='path and/or name of grayscale image to colorize')
+parser.add_argument('-r', '--res', type=int, metavar='',
+                    help='resizes the input to given resolution {default:360}')
+parser.add_argument('-o', '--out_path', type=str, metavar='', required=True,
+                    help='name to which the colorized image to be saved')
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+current_directory = os.getcwd()
+c_path='src/methods/domer/image-colorization/models/model.pth'
+c_path = os.path.join(current_directory, c_path)
+model = ColorizeNet()
+model.load_state_dict(
+    torch.load(c_path, map_location='cpu')
+)
+def main():
+    args = parser.parse_args()
+    img_l = load_gray(args.img_path, shape=args.res)
+    model.eval()
+    with torch.no_grad():
+        img_ab = model(img_l)
+    img_rgb = to_rgb(img_l, img_ab)
+    save_image(torch.from_numpy(img_rgb.transpose(2, 0, 1)),
+               args.out_path)
+    print(f'>>> colorized image saved to "{args.out_path}"')
+if __name__ == '__main__':
+    main()
--- a/src/methods/domer/image-colorization/inference.ipynb
+++ b/src/methods/domer/image-colorization/inference.ipynb
--- a/src/methods/domer/image-colorization/input/0.jpg
+++ b/src/methods/domer/image-colorization/input/0.jpg
--- a/src/methods/domer/image-colorization/input/1.jpg
+++ b/src/methods/domer/image-colorization/input/1.jpg
--- a/src/methods/domer/image-colorization/input/2.jpg
+++ b/src/methods/domer/image-colorization/input/2.jpg
--- a/src/methods/domer/image-colorization/input/3.jpg
+++ b/src/methods/domer/image-colorization/input/3.jpg
--- a/src/methods/domer/image-colorization/input/6.jpg
+++ b/src/methods/domer/image-colorization/input/6.jpg
--- a/src/methods/domer/image-colorization/input/7.jpg
+++ b/src/methods/domer/image-colorization/input/7.jpg
--- a/src/methods/domer/image-colorization/models/checkpoint_0.001.pth
+++ b/src/methods/domer/image-colorization/models/checkpoint_0.001.pth
--- a/src/methods/domer/image-colorization/models/model.pth
+++ b/src/methods/domer/image-colorization/models/model.pth
--- a/src/methods/domer/image-colorization/network.ipynb
+++ b/src/methods/domer/image-colorization/network.ipynb
+%% Cell type:code id: tags:
+``` 
+from network import ColorizeNet
+model = ColorizeNet()
+model
+```
+%% Output
+    ColorizeNet(
+      (encoder): Sequential(
+        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (2): ReLU(inplace=True)
+        (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+        (4): Sequential(
+          (0): BasicBlock(
+            (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          )
+          (1): BasicBlock(
+            (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          )
+        )
+        (5): Sequential(
+          (0): BasicBlock(
+            (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+            (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (downsample): Sequential(
+              (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
+              (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            )
+          )
+          (1): BasicBlock(
+            (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          )
+        )
+      )
+      (decoder): Sequential(
+        (0): Sequential(
+          (0): BasicBlock(
+            (conv1): Conv2d(128, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (res_conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+            (res_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (upsample): Upsample(scale_factor=2.0, mode=nearest)
+          )
+          (1): BasicBlock(
+            (conv1): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (activation): ReLU(inplace=True)
+          )
+        )
+        (1): Sequential(
+          (0): BasicBlock(
+            (conv1): Conv2d(64, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (res_conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
+            (res_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (upsample): Upsample(scale_factor=2.0, mode=nearest)
+          )
+          (1): BasicBlock(
+            (conv1): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (activation): ReLU(inplace=True)
+          )
+        )
+        (2): Sequential(
+          (0): BasicBlock(
+            (conv1): Conv2d(32, 2, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (res_conv): Conv2d(32, 2, kernel_size=(1, 1), stride=(1, 1), bias=False)
+            (res_bn): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (upsample): Upsample(scale_factor=2.0, mode=nearest)
+          )
+          (1): BasicBlock(
+            (conv1): Conv2d(2, 2, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (activation): Sigmoid()
+          )
+        )
+      )
+    )
+%% Cell type:code id: tags:
+``` 
+import torch
+# to check if our model is working as expected
+model(torch.rand((2, 1, 224, 224))).shape
+```
+%% Output
+    torch.Size([2, 2, 224, 224])
+%% Cell type:code id: tags:
+``` 
+from utils import count_params
+count_params(model)  # no of trainable parameters
+```
+%% Output
+    1166016
+%% Cell type:code id: tags:
+``` 
+from network import ColorizeNet
+model = ColorizeNet()
+model
+```
+%% Output
+    ColorizeNet(
+      (encoder): Sequential(
+        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (2): ReLU(inplace=True)
+        (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+        (4): Sequential(
+          (0): BasicBlock(
+            (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          )
+          (1): BasicBlock(
+            (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          )
+        )
+        (5): Sequential(
+          (0): BasicBlock(
+            (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+            (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (downsample): Sequential(
+              (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
+              (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            )
+          )
+          (1): BasicBlock(
+            (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          )
+        )
+      )
+      (decoder): Sequential(
+        (0): Sequential(
+          (0): BasicBlock(
+            (conv1): Conv2d(128, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (res_conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+            (res_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (upsample): Upsample(scale_factor=2.0, mode=nearest)
+          )
+          (1): BasicBlock(
+            (conv1): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (activation): ReLU(inplace=True)
+          )
+        )
+        (1): Sequential(
+          (0): BasicBlock(
+            (conv1): Conv2d(64, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (res_conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
+            (res_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (upsample): Upsample(scale_factor=2.0, mode=nearest)
+          )
+          (1): BasicBlock(
+            (conv1): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (activation): ReLU(inplace=True)
+          )
+        )
+        (2): Sequential(
+          (0): BasicBlock(
+            (conv1): Conv2d(32, 2, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (res_conv): Conv2d(32, 2, kernel_size=(1, 1), stride=(1, 1), bias=False)
+            (res_bn): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (upsample): Upsample(scale_factor=2.0, mode=nearest)
+          )
+          (1): BasicBlock(
+            (conv1): Conv2d(2, 2, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
+            (bn1): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (relu): ReLU(inplace=True)
+            (conv2): Conv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+            (bn2): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+            (activation): Sigmoid()
+          )
+        )
+      )
+    )
+%% Cell type:code id: tags:
+``` 
+import torch
+# to check if our model is working as expected
+model(torch.rand((2, 1, 224, 224))).shape
+```
+%% Output
+    torch.Size([2, 2, 224, 224])
+%% Cell type:code id: tags:
+``` 
+from utils import count_params
+count_params(model)  # no of trainable parameters
+```
+%% Output
+    1166016
--- a/src/methods/domer/image-colorization/network.py
+++ b/src/methods/domer/image-colorization/network.py
+import torch.nn as nn
+import torchvision.models as models
+class BasicBlock(nn.Module):
+    def __init__(self, in_channels, out_channels,
+                 activation=None, upsample=None):
+        super().__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels=in_channels, out_channels=out_channels,
+            kernel_size=5, stride=1, padding=2, bias=False
+        )
+        self.bn1 = nn.BatchNorm2d(num_features=out_channels)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(
+            in_channels=out_channels, out_channels=out_channels,
+            kernel_size=3, stride=1, padding=1, bias=False
+        )
+        self.bn2 = nn.BatchNorm2d(num_features=out_channels)
+        if activation is not None:
+            self.activation = activation
+        else:
+            self.res_conv = nn.Conv2d(
+                in_channels=in_channels, out_channels=out_channels,
+                kernel_size=1, stride=1, bias=False
+            )
+            self.res_bn = nn.BatchNorm2d(num_features=out_channels)
+        self.upsample = upsample
+    def forward(self, t):
+        res = t
+        t = self.conv1(t)
+        t = self.bn1(t)
+        t = self.relu(t)
+        t = self.conv2(t)
+        t = self.bn2(t)
+        if self.upsample is not None:
+            res = self.res_conv(res)
+            res = self.res_bn(res)
+            t += res
+            t = self.relu(t)
+            t = self.upsample(t)
+        else:
+            t += res
+            t = self.activation(t)
+        return t
+class ColorizeNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # make pretrained=True before starting the training process
+        resnet18 = models.resnet18(pretrained=False)
+        # change first conv layer to accept single channel (grayscale)
+        resnet18.conv1.weight = nn.Parameter(
+            resnet18.conv1.weight.mean(dim=1).unsqueeze(dim=1))
+        # use first 3 layers of ResNet-18 as encoder
+        self.encoder = nn.Sequential(
+            *list(resnet18.children())[:6]
+        )
+        self.decoder = nn.Sequential(
+            self._make_layer(BasicBlock, 128, 64, nn.ReLU(inplace=True)),
+            self._make_layer(BasicBlock, 64, 32, nn.ReLU(inplace=True)),
+            self._make_layer(BasicBlock, 32, 2, nn.Sigmoid())
+        )
+    def _make_layer(self, block, in_channels, out_channels, activation):
+        upsample = nn.Upsample(scale_factor=2, mode='nearest')
+        layers = []
+        layers.append(block(in_channels, out_channels, upsample=upsample))
+        layers.append(block(out_channels, out_channels, activation=activation))
+        return nn.Sequential(*layers)
+    def forward(self, t):
+        t = self.encoder(t)
+        t = self.decoder(t)
+        return t
--- a/src/methods/domer/image-colorization/output/0.jpg
+++ b/src/methods/domer/image-colorization/output/0.jpg
No results found