Model constructor.
Create and tune pytorch model.
ModelConstructor and ModelCfg
Main part of model_constructor - ModelConstructor and ModelCfg.
from model_constructor.model_constructor import ModelCfg, ModelConstructor
ModelCfg is base for model config, ModelConstructor got all we need to create model. And it subclassed from ModelCfg all config plus methods for create model.
So we can create config and than constructor or model from it.
Or create constructor or model from MOdelConstructor.
Lets create base config.
cfg = ModelCfg()
cfg.print_cfg()
output
ModelCfg(
in_chans=3
num_classes=1000
block='BasicBlock'
conv_layer='ConvBnAct'
block_sizes=[64, 128, 256, 512]
layers=[2, 2, 2, 2]
norm='BatchNorm2d'
act_fn='ReLU'
expansion=1
groups=1
bn_1st=True
zero_bn=True
stem_sizes=[64]
stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}")
Now we can create model directly from config or throw creating constructor.
model = ModelConstructor.create_model(cfg)
model_constructor = ModelConstructor.from_cfg(cfg)
model = model_constructor()
Instantiate config or constructor.
When initialize config or constructor, we can use string interpolations of nn.Modules instead of class. By default we search at torch.nn.
cfg = ModelCfg(act_fn="torch.nn.Mish")
print(cfg.act_fn)
output
class 'torch.nn.modules.activation.Mish'
cfg = ModelCfg(act_fn="nn.SELU")
print(cfg.act_fn)
output
class 'torch.nn.modules.activation.SELU'
cfg = ModelCfg(
act_fn="Mish",
block="model_constructor.yaresnet.YaBasicBlock",
)
print(cfg.act_fn)
print(cfg.block)
output
class 'torch.nn.modules.activation.Mish'>
Stem, Body, Head.
By default constructor create nn.Sequential model with stem, body and head. We can check it at constructor stage.
model = ModelConstructor.create_model()
for name, mod in model.named_children():
print(name)
output
stem
body
head
Constructor create stem, body and head with make_stem, make_body and make_head methods. They are defined separately as functions with ModelCfg as argument.
And we can change it on the fly as:
mc.make_stem = custom_stem
mc.make_body = custom_body
mc.make_head = custom_head
Or at initializations as:
mc = ModelConstructor(make_stem=custom_stem)
from model_constructor.model_constructor import make_stem, make_body, make_head, make_layer
Stem
stem = make_stem(cfg)
stem
output
Sequential(
(conv_1): ConvBnAct(
(conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(stem_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
Layer
make_layer need layer_num argument - number of layer.
make_layer separated with make_body - it can be one piece.
layer = make_layer(cfg, layer_num=0)
layer
output
Sequential(
(bl_0): YaBasicBlock(
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(merge): Mish(inplace=True)
)
(bl_1): YaBasicBlock(
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(merge): Mish(inplace=True)
)
)
Body
body = make_body(cfg)
body
output
Sequential(
(l_0): Sequential(
(bl_0): YaBasicBlock(
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(merge): Mish(inplace=True)
)
(bl_1): YaBasicBlock(
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(merge): Mish(inplace=True)
)
)
(l_1): Sequential(
(bl_0): YaBasicBlock(
(reduce): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): ReLU(inplace=True)
)
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(id_conv): ConvBnAct(
(conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(merge): Mish(inplace=True)
)
(bl_1): YaBasicBlock(
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(merge): Mish(inplace=True)
)
)
(l_2): Sequential(
(bl_0): YaBasicBlock(
(reduce): ConvBnAct(
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): ReLU(inplace=True)
)
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(id_conv): ConvBnAct(
(conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(merge): Mish(inplace=True)
)
(bl_1): YaBasicBlock(
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(merge): Mish(inplace=True)
)
)
(l_3): Sequential(
(bl_0): YaBasicBlock(
(reduce): ConvBnAct(
(conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): ReLU(inplace=True)
)
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(id_conv): ConvBnAct(
(conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(merge): Mish(inplace=True)
)
(bl_1): YaBasicBlock(
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): Mish(inplace=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(merge): Mish(inplace=True)
)
)
)
Head
head = make_head(cfg)
head
output
Sequential(
(pool): AdaptiveAvgPool2d(output_size=1)
(flat): Flatten(start_dim=1, end_dim=-1)
(fc): Linear(in_features=512, out_features=1000, bias=True)
)
Model Constructor.
mc = ModelConstructor()
mc
output
ModelConstructor
in_chans: 3, num_classes: 1000
expansion: 1, groups: 1, dw: False, div_groups: None
act_fn: ReLU, sa: False, se: False
stem sizes: [64], stride on 0
body sizes [64, 128, 256, 512]
layers: [2, 2, 2, 2]
mc.stem
output
Sequential(
(conv_1): ConvBnAct(
(conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(act_fn): ReLU(inplace=True)
)
(stem_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)
mc.bn_1st = False
mc.act_fn = nn.LeakyReLU
mc.sa = SimpleSelfAttention
mc.se = SEModule
mc.body.l_0
output
Sequential(
(bl_0): BasicBlock(
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(se): SEModule(
(squeeze): AdaptiveAvgPool2d(output_size=1)
(excitation): Sequential(
(reduce): Linear(in_features=64, out_features=4, bias=True)
(se_act): ReLU(inplace=True)
(expand): Linear(in_features=4, out_features=64, bias=True)
(se_gate): Sigmoid()
)
)
)
(act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
)
(bl_1): BasicBlock(
(convs): Sequential(
(conv_0): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv_1): ConvBnAct(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(se): SEModule(
(squeeze): AdaptiveAvgPool2d(output_size=1)
(excitation): Sequential(
(reduce): Linear(in_features=64, out_features=4, bias=True)
(se_act): ReLU(inplace=True)
(expand): Linear(in_features=4, out_features=64, bias=True)
(se_gate): Sigmoid()
)
)
(sa): SimpleSelfAttention(
(conv): Conv1d(64, 64, kernel_size=(1,), stride=(1,), bias=False)
)
)
(act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
)
)
model_constructor by ayasyrev