Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • samanost/sicom_image_analysis_project
  • gerayelk/sicom_image_analysis_project
  • jelassiy/sicom_image_analysis_project
  • chardoto/sicom_image_analysis_project
  • chaarim/sicom_image_analysis_project
  • domers/sicom_image_analysis_project
  • elmurrt/sicom_image_analysis_project
  • sadonest/sicom_image_analysis_project
  • kouddann/sicom_image_analysis_project
  • mirabitj/sicom-image-analysis-project-mirabito
  • plotj/sicom_image_analysis_project
  • torrem/sicom-image-analysis-project-maxime-torre
  • dzike/sicom_image_analysis_project
  • daip/sicom_image_analysis_project
  • casanovv/sicom_image_analysis_project
  • girmarti/sicom_image_analysis_project
  • lioretn/sicom_image_analysis_project
  • lemoinje/sicom_image_analysis_project
  • ouahmanf/sicom_image_analysis_project
  • vouilloa/sicom_image_analysis_project
  • diopb/sicom_image_analysis_project
  • davidale/sicom_image_analysis_project
  • enza/sicom_image_analysis_project
  • conversb/sicom_image_analysis_project
  • mullemat/sicom_image_analysis_project
25 results
Show changes
Showing
with 683 additions and 0 deletions
File added
"""A file containing the forward operator.
This file should NOT be modified.
"""
import numpy as np
from src.checks import check_cfa, check_rgb
class CFA():
def __init__(self, cfa: str, input_shape: tuple) -> None:
"""Constructor of the forward operator's class.
Args:
cfa (str): Name of the pattern. Either bayer or quad_bayer.
input_shape (tuple): Shape of the input images of the operator.
"""
check_cfa(cfa)
self.cfa = cfa
self.input_shape = input_shape
self.output_shape = input_shape[:-1]
if self.cfa == 'bayer':
self.mask = get_bayer_mask(input_shape)
elif self.cfa == 'quad_bayer':
self.mask = get_quad_bayer_mask(input_shape)
def direct(self, x: np.ndarray) -> np.ndarray:
"""Applies the CFA operation to the image x.
Args:
x (np.ndarray): Input image.
Returns:
np.ndarray: Output image.
"""
check_rgb(x)
return np.sum(x * self.mask, axis=2)
def adjoint(self, y: np.ndarray) -> np.ndarray:
"""Applies the adjoint of the CFA operation.
Args:
y (np.ndarray): Input image.
Returns:
np.ndarray: Output image.
"""
return self.mask * y[..., np.newaxis]
def get_bayer_mask(input_shape: tuple) -> np.ndarray:
"""Return the mask of the Bayer CFA.
Args:
input_shape (tuple): Shape of the mask.
Returns:
np.ndarray: Mask.
"""
res = np.kron(np.ones((input_shape[0], input_shape[1], 1)), [0, 1, 0])
res[::2, 1::2] = [1, 0, 0]
res[1::2, ::2] = [0, 0, 1]
return res
def get_quad_bayer_mask(input_shape: tuple) -> np.ndarray:
"""Return the mask of the quad_bayer CFA.
Args:
input_shape (tuple): Shape of the mask.
Returns:
np.ndarray: Mask.
"""
res = np.kron(np.ones((input_shape[0], input_shape[1], 1)), [0, 1, 0])
res[::4, 2::4] = [1, 0, 0]
res[::4, 3::4] = [1, 0, 0]
res[1::4, 2::4] = [1, 0, 0]
res[1::4, 3::4] = [1, 0, 0]
res[2::4, ::4] = [0, 0, 1]
res[2::4, 1::4] = [0, 0, 1]
res[3::4, ::4] = [0, 0, 1]
res[3::4, 1::4] = [0, 0, 1]
return res
####
####
####
#### #### #### #############
#### ###### #### ##################
#### ######## #### ####################
#### ########## #### #### ########
#### ############ #### #### ####
#### #### ######## #### #### ####
#### #### ######## #### #### ####
#### #### ######## #### #### ####
#### #### ## ###### #### #### ######
#### #### #### ## #### #### ############
#### #### ###### #### #### ##########
#### #### ########## #### #### ########
#### #### ######## #### ####
#### #### ############ ####
#### #### ########## ####
#### #### ######## ####
#### #### ###### ####
# 2023
# Authors: Mauro Dalla Mura and Matthieu Muller
MIT License
Copyright (c) 2020 priyavrat-misra
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# Image Colorization
![cover](https://github.com/priyavrat-misra/image-colorization/blob/master/images/colorized/bnw_col.png?raw=true "an eye-candy")
## Contents
- [Overview](#overview)
- [Approach](#approach)
- [Steps](#steps)
- [Results](#results)
- [TL;DR](#tldr)
- [Setup](#setup)
- [Usage](#usage)
- [Todo](#todo)
<br>
## Overview
> This project is a Deep Convolutional Neural Network approach to solve the task of image colorization.
> The goal is to produce a colored image given a grayscale image.<br>
> At it's heart, it uses Convolutional Auto-Encoders to solve this task.
> First few layers of [ResNet-18](https://arxiv.org/abs/1512.03385) model are used as the Encoder,
> and the Decoder consists of a series of Deconvolution layers (i.e., upsample layers followed by convolutions) and residual connections.<br>
> The model is trained on a subset of [MIT Places365](http://places2.csail.mit.edu/index.html) dataset, consisting of `41000` images of landscapes and scenes.
## Approach
> The images in the dataset are in RGB Colorspace.
> Before loading the images, the images are converted to [LAB colorspace](https://en.wikipedia.org/wiki/CIELAB_color_space).
> This colorspace contains exactly the same information as RGB.<br>
> It has 3 channels, `Lightness, A and B`.
> The lightness channel can be used as the grayscale equivalent of a colored image,
> the rest 2 channels (A and B) contain the color information.<br>
>
> In a nutshell, the training process follows these steps:
>> 1. The lightness channel is separated from the other 2 channels and used as the model's input.
>> 2. The model predicts the A and B channels (or 'AB' for short).
>> 3. The loss is calculated by comparing the predicted AB and the corresponding original AB of the input image.
>
> More about the training process can be found [here](https://github.com/priyavrat-misra/image-colorization/blob/master/train.ipynb "train.ipynb").
## Steps
> 1. [Defining a model architecture:](https://github.com/priyavrat-misra/image-colorization/blob/master/network.py "network.py")
> - The model follows an Auto-Encoder kind of architecture i.e., it has an `encoder` and a `decoder` part.
> - The encoder is used to _extract features_ of an image whereas,
> - the decoder is used to upsample the features. In other words, it increases the _spacial resolution_.
> - In here, the layers of the encoder are taken from ResNet-18 model, and the first conv layer is modified to take a single channel as input (i.e., grayscale or lightness) rather than 3 channels.
> - The decoder uses nearest neighbor upsampling (for increasing the spacial resolution),
> followed by convolutional layers (for dealing with the depth).
> - A more detailed visualization of the model architecture can be seen [here](https://github.com/priyavrat-misra/image-colorization/blob/master/images/architecture.png?raw=true 'after all "A picture is worth a thousand words" :)').
> 2. [Defining a custom dataloader:](https://github.com/priyavrat-misra/image-colorization/blob/master/utils.py "utils.GrayscaleImageFolder")
> - when loading the images, it converts them to LAB, and returns L and AB separately.
> - it does few data processing tasks as well like applying tranforms and normalization.
> 3. [Training the model:](https://github.com/priyavrat-misra/image-colorization/blob/master/train.ipynb "train.ipynb")
> - The model is trained for 64 epochs with [Adam Optimization](https://arxiv.org/abs/1412.6980).
> - For calculating the loss between the predicted AB and the original AB, Mean Squared Error is used.
> 4. [Inference:](https://github.com/priyavrat-misra/image-colorization/blob/master/inference.ipynb "inference.ipynb")
> - Inference is done with unseen images and the results look promising, or should I say "natural"? :)
## Results
> ![results](https://github.com/priyavrat-misra/image-colorization/blob/master/images/results.png?raw=true)
> _<sup>More colorized examples can be found in [here](https://github.com/priyavrat-misra/image-colorization/blob/master/images/colorized/).<sup>_
## TL;DR
> Given an image, the model can colorize it.
## Setup
- Clone and change directory:
```bash
git clone "https://github.com/priyavrat-misra/image-colorization.git"
cd image-colorization/
```
- Dependencies:
```bash
pip install -r requirements.txt
```
## Usage
```bash
python colorize.py --img-path <path/to/image.jpg> --out-path <path/to/output.jpg> --res 360
# or the short-way:
python colorize.py -i <path/to/image.jpg> -o <path/to/output.jpg> -r 360
```
_Note:_
> - As the model is trained with 224x224 images, it gives best results when `--res` is set to lower resolutions (<=480) and okay-ish when set around ~720.
> - Setting `--res` higher than that of input image won't increase the output's quality.
<br>
## Todo
- [x] define & train a model architecture
- [x] add argparse support
- [x] define a more residual architecture
- [x] use pretrained resnet-18 params for the layers used in the encoder & train the model
- [x] check how the colorization effect varies with image resolution
- [x] separate the model from the checkpoint file to a different file
- [x] complete README.md
- [ ] deploy with flask
- [ ] _after that, host it maybe?_
<br>
For any queries, feel free to reach me out on [LinkedIn](https://linkedin.com/in/priyavrat-misra/).
\ No newline at end of file
import argparse
import torch
from torchvision.utils import save_image
from network import ColorizeNet
from utils import load_gray, to_rgb
import os
parser = argparse.ArgumentParser(description='colorizes an image')
parser.add_argument('-i', '--img_path', type=str, metavar='', required=True,
help='path and/or name of grayscale image to colorize')
parser.add_argument('-r', '--res', type=int, metavar='',
help='resizes the input to given resolution {default:360}')
parser.add_argument('-o', '--out_path', type=str, metavar='', required=True,
help='name to which the colorized image to be saved')
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
current_directory = os.getcwd()
c_path='src/methods/domer/image-colorization/models/model.pth'
c_path = os.path.join(current_directory, c_path)
model = ColorizeNet()
model.load_state_dict(
torch.load(c_path, map_location='cpu')
)
def main():
args = parser.parse_args()
img_l = load_gray(args.img_path, shape=args.res)
model.eval()
with torch.no_grad():
img_ab = model(img_l)
img_rgb = to_rgb(img_l, img_ab)
save_image(torch.from_numpy(img_rgb.transpose(2, 0, 1)),
args.out_path)
print(f'>>> colorized image saved to "{args.out_path}"')
if __name__ == '__main__':
main()
source diff could not be displayed: it is too large. Options to address this: view the blob.
src/methods/domer/image-colorization/input/0.jpg

24.3 KiB

src/methods/domer/image-colorization/input/1.jpg

182 KiB

src/methods/domer/image-colorization/input/2.jpg

159 KiB

src/methods/domer/image-colorization/input/3.jpg

159 KiB

src/methods/domer/image-colorization/input/6.jpg

13.4 KiB

src/methods/domer/image-colorization/input/7.jpg

14.9 KiB

File added
File added
%% Cell type:code id: tags:
```
from network import ColorizeNet
model = ColorizeNet()
model
```
%% Output
ColorizeNet(
(encoder): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(5): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
)
(decoder): Sequential(
(0): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(res_conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(res_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(upsample): Upsample(scale_factor=2.0, mode=nearest)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): ReLU(inplace=True)
)
)
(1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
(bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(res_conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(res_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(upsample): Upsample(scale_factor=2.0, mode=nearest)
)
(1): BasicBlock(
(conv1): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
(bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): ReLU(inplace=True)
)
)
(2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(32, 2, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
(bn1): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(res_conv): Conv2d(32, 2, kernel_size=(1, 1), stride=(1, 1), bias=False)
(res_bn): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(upsample): Upsample(scale_factor=2.0, mode=nearest)
)
(1): BasicBlock(
(conv1): Conv2d(2, 2, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
(bn1): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(2, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation): Sigmoid()
)
)
)
)
%% Cell type:code id: tags:
```
import torch
# to check if our model is working as expected
model(torch.rand((2, 1, 224, 224))).shape
```
%% Output
torch.Size([2, 2, 224, 224])
%% Cell type:code id: tags:
```
from utils import count_params
count_params(model) # no of trainable parameters
```
%% Output
1166016
import torch.nn as nn
import torchvision.models as models
class BasicBlock(nn.Module):
def __init__(self, in_channels, out_channels,
activation=None, upsample=None):
super().__init__()
self.conv1 = nn.Conv2d(
in_channels=in_channels, out_channels=out_channels,
kernel_size=5, stride=1, padding=2, bias=False
)
self.bn1 = nn.BatchNorm2d(num_features=out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(
in_channels=out_channels, out_channels=out_channels,
kernel_size=3, stride=1, padding=1, bias=False
)
self.bn2 = nn.BatchNorm2d(num_features=out_channels)
if activation is not None:
self.activation = activation
else:
self.res_conv = nn.Conv2d(
in_channels=in_channels, out_channels=out_channels,
kernel_size=1, stride=1, bias=False
)
self.res_bn = nn.BatchNorm2d(num_features=out_channels)
self.upsample = upsample
def forward(self, t):
res = t
t = self.conv1(t)
t = self.bn1(t)
t = self.relu(t)
t = self.conv2(t)
t = self.bn2(t)
if self.upsample is not None:
res = self.res_conv(res)
res = self.res_bn(res)
t += res
t = self.relu(t)
t = self.upsample(t)
else:
t += res
t = self.activation(t)
return t
class ColorizeNet(nn.Module):
def __init__(self):
super().__init__()
# make pretrained=True before starting the training process
resnet18 = models.resnet18(pretrained=False)
# change first conv layer to accept single channel (grayscale)
resnet18.conv1.weight = nn.Parameter(
resnet18.conv1.weight.mean(dim=1).unsqueeze(dim=1))
# use first 3 layers of ResNet-18 as encoder
self.encoder = nn.Sequential(
*list(resnet18.children())[:6]
)
self.decoder = nn.Sequential(
self._make_layer(BasicBlock, 128, 64, nn.ReLU(inplace=True)),
self._make_layer(BasicBlock, 64, 32, nn.ReLU(inplace=True)),
self._make_layer(BasicBlock, 32, 2, nn.Sigmoid())
)
def _make_layer(self, block, in_channels, out_channels, activation):
upsample = nn.Upsample(scale_factor=2, mode='nearest')
layers = []
layers.append(block(in_channels, out_channels, upsample=upsample))
layers.append(block(out_channels, out_channels, activation=activation))
return nn.Sequential(*layers)
def forward(self, t):
t = self.encoder(t)
t = self.decoder(t)
return t
src/methods/domer/image-colorization/output/0.jpg

25.8 KiB