自動化無しに生活無し

WEB開発関係を中心に備忘録をまとめています

KerasのCNNモデルのコードを解釈する

  • 作成日時:
  • 最終更新日時:
  • Categories: others
  • Tags: AI開発 keras CNN
thumbnail

Keras公式GitHubからコードを拝借。一部コメントや不必要な箇所を絞って、各CNNモデルの特徴がわかるように再構成してまとめる。

以下関連記事。

VGG

VGG は3x3畳み込みをひたすら積み重ねるシンプルモデル。

とりわけ今回はVGG16(畳み込み層+全結合層が16層)を解説。

https://github.com/keras-team/keras/blob/master/keras/src/applications/vgg16.py

from keras.src import backend
from keras.src import layers
from keras.src.api_export import keras_export
from keras.src.applications import imagenet_utils
from keras.src.models import Functional
from keras.src.ops import operation_utils
from keras.src.utils import file_utils

WEIGHTS_PATH = (
    "https://storage.googleapis.com/tensorflow/keras-applications/"
    "vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5"
)
WEIGHTS_PATH_NO_TOP = (
    "https://storage.googleapis.com/tensorflow/"
    "keras-applications/vgg16/"
    "vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5"
)


@keras_export(["keras.applications.vgg16.VGG16", "keras.applications.VGG16"])
def VGG16(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="vgg16",
):


    # 引数のバリデーション等を行う。
    
    if not (weights in {"imagenet", None} or file_utils.exists(weights)):
        raise ValueError(
            "The `weights` argument should be either "
            "`None` (random initialization), 'imagenet' "
            "(pre-training on ImageNet), "
            "or the path to the weights file to be loaded.  Received: "
            f"weights={weights}"
        )

    # imagenet を使う場合、クラス数が1000でない場合はエラー。
    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            "If using `weights='imagenet'` with `include_top=True`, "
            "`classes` should be 1000.  "
            f"Received classes={classes}"
        )

    # 入力サイズの決定。 入力サイズは224x224 を基本として、全結合層なしの場合は32x32とする。
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=224,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights,
    )

    # 入力テンソルが用意されていればそれを使う。
    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # 引数のバリデーション等ここまで



    # TIPS: ここで3x3畳み込みをシンプルに重ねている。活性化関数はReLUで。
    #       プーリングはマックスプーリング2x2

    # Conv2D 13層

    # Block 1
    x = layers.Conv2D(64, (3, 3), activation="relu", padding="same", name="block1_conv1")(img_input)
    x = layers.Conv2D(64, (3, 3), activation="relu", padding="same", name="block1_conv2")(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block1_pool")(x)

    # Block 2
    x = layers.Conv2D(128, (3, 3), activation="relu", padding="same", name="block2_conv1")(x)
    x = layers.Conv2D(128, (3, 3), activation="relu", padding="same", name="block2_conv2")(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block2_pool")(x)

    # Block 3
    x = layers.Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv1")(x)
    x = layers.Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv2")(x)
    x = layers.Conv2D(256, (3, 3), activation="relu", padding="same", name="block3_conv3")(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block3_pool")(x)

    # Block 4
    x = layers.Conv2D(512, (3, 3), activation="relu", padding="same", name="block4_conv1")(x)
    x = layers.Conv2D(512, (3, 3), activation="relu", padding="same", name="block4_conv2")(x)
    x = layers.Conv2D(512, (3, 3), activation="relu", padding="same", name="block4_conv3")(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block4_pool")(x)

    # Block 5
    x = layers.Conv2D(512, (3, 3), activation="relu", padding="same", name="block5_conv1")(x)
    x = layers.Conv2D(512, (3, 3), activation="relu", padding="same", name="block5_conv2")(x)
    x = layers.Conv2D(512, (3, 3), activation="relu", padding="same", name="block5_conv3")(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name="block5_pool")(x)

    # ==== 畳み込み+マックスプールここまで ====

    # TIPS: ここで全結合層を作っている。 Dense 3層
    if include_top:

        # 線形変換。
        x = layers.Flatten(name="flatten")(x)

        # VGGの巨大な全結合層
        x = layers.Dense(4096, activation="relu", name="fc1")(x)
        x = layers.Dense(4096, activation="relu", name="fc2")(x)

        # 出力層のカスタマイズ。クラス数に合わせてclassifier_activation(Softmax)を使う。
        imagenet_utils.validate_activation(classifier_activation, weights)

        x = layers.Dense(classes, activation=classifier_activation, name="predictions")(x)

    else:
        # VGGの論文にはないが、転移学習用にGAPを使っている。全結合層を使わないタイプのVGG
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D()(x)

    if input_tensor is not None:
        inputs = operation_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # TIPS: FunctionalAPIでモデルを作る。
    model = Functional(inputs, x, name=name)

    
    # 学習済みの重みを読み込みするか否か?
    if weights == "imagenet":
        if include_top:
            weights_path = file_utils.get_file(
                "vgg16_weights_tf_dim_ordering_tf_kernels.h5",
                WEIGHTS_PATH,
                cache_subdir="models",
                file_hash="64373286793e3c8b2b4e3219cbf3544b",
            )
        else:
            weights_path = file_utils.get_file(
                "vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5",
                WEIGHTS_PATH_NO_TOP,
                cache_subdir="models",
                file_hash="6d6bbae143d832006294945121d1f1fc",
            )

        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model


@keras_export("keras.applications.vgg16.preprocess_input")
def preprocess_input(x, data_format=None):
    return imagenet_utils.preprocess_input(
        x, data_format=data_format, mode="caffe"
    )


@keras_export("keras.applications.vgg16.decode_predictions")
def decode_predictions(preds, top=5):
    return imagenet_utils.decode_predictions(preds, top=top)


preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format(
    mode="",
    ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_CAFFE,
    error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC,
)
decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__

VGGなので、全体的に非常にシンプル。畳み込みとマックスプーリングを重ね、最後に線形変換して全結合している。

特徴抽出器としてGlobal Average Poolingを使っている。これにより、更に安定して分類を行うことができる。

GoogLeNet

KerasにはGoogLeNetは存在しない。が、inception_v3があるためそちらを解説する。

Inception モジュールは、GoogLeNetの 1x1 3x3 5x5 の畳み込みを並列に行うモジュールのこと。

InceptionV3 なので、1x1 5x5 7x7 の畳み込みになっている。

https://github.com/keras-team/keras/blob/master/keras/src/applications/inception_v3.py

from keras.src import backend
from keras.src import layers
from keras.src.api_export import keras_export
from keras.src.applications import imagenet_utils
from keras.src.models import Functional
from keras.src.ops import operation_utils
from keras.src.utils import file_utils

WEIGHTS_PATH = (
    "https://storage.googleapis.com/tensorflow/keras-applications/"
    "inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5"
)
WEIGHTS_PATH_NO_TOP = (
    "https://storage.googleapis.com/tensorflow/keras-applications/"
    "inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5"
)



# Inceptionモジュール。(1x1 5x5 7x7 の畳み込みの並列処理)
@keras_export(
    [
        "keras.applications.inception_v3.InceptionV3",
        "keras.applications.InceptionV3",
    ]
)
def InceptionV3(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="inception_v3",
):

    # 入力値のバリデーション等

    if not (weights in {"imagenet", None} or file_utils.exists(weights)):
        raise ValueError(
            "The `weights` argument should be either "
            "`None` (random initialization), `imagenet` "
            "(pre-training on ImageNet), "
            "or the path to the weights file to be loaded; "
            f"Received: weights={weights}"
        )

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            'If using `weights="imagenet"` with `include_top=True`, '
            "`classes` should be 1000. "
            f"Received classes={classes}"
        )

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=299,
        min_size=75,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights,
    )

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    if backend.image_data_format() == "channels_first":
        channel_axis = 1
    else:
        channel_axis = 3

    # ==== 入力値のバリデーション等ここまで ====


    # Batch Normalization (バッチ正規化を使用した畳み込みを行う。)
    
    # 畳み込み3層+マックスプーリング
    x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding="valid")
    x = conv2d_bn(x, 32, 3, 3, padding="valid")
    x = conv2d_bn(x, 64, 3, 3)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    # 1x1畳み込み + 3x3畳み込み + マックスプーリング
    x = conv2d_bn(x, 80, 1, 1, padding="valid")
    x = conv2d_bn(x, 192, 3, 3, padding="valid")
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)


    # ここで1x1 5x5 7x7の畳み込みを一時変数に記録する形で並列的に処理している
    # なぜ5x5畳み込み2層?なぜ3x3畳み込みを3層?
    # → 同一範囲に寄せながら、計算コストを減らすための工夫。

    branch1x1 = conv2d_bn(x, 64, 1, 1)

    # 実質5x5 相当
    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    # 実質7x7相当
    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

    # 平均プーリングで特徴圧縮
    branch_pool = layers.AveragePooling2D( (3, 3), strides=(1, 1), padding="same" )(x)

    branch_pool = conv2d_bn(branch_pool, 32, 1, 1)

    # 各種の畳み込みを結合する。
    x = layers.concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name="mixed0",
    )

    # 以降、繰り返し。
    # ==================

    # mixed 1: 35 x 35 x 288
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

    branch_pool = layers.AveragePooling2D( (3, 3), strides=(1, 1), padding="same" )(x)

    branch_pool = conv2d_bn(branch_pool, 64, 1, 1)

    # 各種の畳み込みを結合する。
    x = layers.concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name="mixed1",
    )

    # ==================

    # mixed 2: 35 x 35 x 288
    branch1x1 = conv2d_bn(x, 64, 1, 1)

    branch5x5 = conv2d_bn(x, 48, 1, 1)
    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

    branch_pool = layers.AveragePooling2D((3, 3), strides=(1, 1), padding="same")(x)

    branch_pool = conv2d_bn(branch_pool, 64, 1, 1)

    # 各種の畳み込みを結合する。
    x = layers.concatenate(
        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name="mixed2",
    )

    # ==================

    # mixed 3: 17 x 17 x 768
    branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding="valid")

    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
    branch3x3dbl = conv2d_bn( branch3x3dbl, 96, 3, 3, strides=(2, 2), padding="valid" )

    branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    # 各種の畳み込みを結合する。
    x = layers.concatenate(
        [branch3x3, branch3x3dbl, branch_pool],
        axis=channel_axis,
        name="mixed3"
    )

    # mixed 4: 17 x 17 x 768
    branch1x1 = conv2d_bn(x, 192, 1, 1)

    branch7x7 = conv2d_bn(x, 128, 1, 1)
    branch7x7 = conv2d_bn(branch7x7, 128, 1, 7)
    branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

    branch7x7dbl = conv2d_bn(x, 128, 1, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

    branch_pool = layers.AveragePooling2D((3, 3), strides=(1, 1), padding="same" )(x)
    branch_pool = conv2d_bn(branch_pool, 192, 1, 1)

    # 各種の畳み込みを結合する。
    x = layers.concatenate(
        [branch1x1, branch7x7, branch7x7dbl, branch_pool],
        axis=channel_axis,
        name="mixed4",
    )

    # mixed 5, 6: 17 x 17 x 768
    for i in range(2):
        branch1x1 = conv2d_bn(x, 192, 1, 1)

        branch7x7 = conv2d_bn(x, 160, 1, 1)
        branch7x7 = conv2d_bn(branch7x7, 160, 1, 7)
        branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

        branch7x7dbl = conv2d_bn(x, 160, 1, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1)
        branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

        branch_pool = layers.AveragePooling2D(
            (3, 3), strides=(1, 1), padding="same"
        )(x)
        branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
        x = layers.concatenate(
            [branch1x1, branch7x7, branch7x7dbl, branch_pool],
            axis=channel_axis,
            name="mixed{0}".format(5 + i),
        )

    # mixed 7: 17 x 17 x 768
    branch1x1 = conv2d_bn(x, 192, 1, 1)

    branch7x7 = conv2d_bn(x, 192, 1, 1)
    branch7x7 = conv2d_bn(branch7x7, 192, 1, 7)
    branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

    branch7x7dbl = conv2d_bn(x, 192, 1, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
    branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

    branch_pool = layers.AveragePooling2D((3, 3), strides=(1, 1), padding="same")(x)
    branch_pool = conv2d_bn(branch_pool, 192, 1, 1)

    x = layers.concatenate(
        [branch1x1, branch7x7, branch7x7dbl, branch_pool],
        axis=channel_axis,
        name="mixed7",
    )

    # mixed 8: 8 x 8 x 1280
    branch3x3 = conv2d_bn(x, 192, 1, 1)
    branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, strides=(2, 2), padding="valid")

    branch7x7x3 = conv2d_bn(x, 192, 1, 1)
    branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7)
    branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1)
    branch7x7x3 = conv2d_bn(branch7x7x3, 192, 3, 3, strides=(2, 2), padding="valid")

    branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    x = layers.concatenate(
        [branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name="mixed8"
    )

    # mixed 9: 8 x 8 x 2048
    for i in range(2):
        branch1x1 = conv2d_bn(x, 320, 1, 1)

        branch3x3 = conv2d_bn(x, 384, 1, 1)
        branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3)
        branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1)
        branch3x3 = layers.concatenate(
            [branch3x3_1, branch3x3_2],
            axis=channel_axis,
            name=f"mixed9_{i}",
        )

        branch3x3dbl = conv2d_bn(x, 448, 1, 1)
        branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3)
        branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3)
        branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1)
        branch3x3dbl = layers.concatenate(
            [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis
        )

        branch_pool = layers.AveragePooling2D((3, 3), strides=(1, 1), padding="same")(x)
        branch_pool = conv2d_bn(branch_pool, 192, 1, 1)

        x = layers.concatenate(
            [branch1x1, branch3x3, branch3x3dbl, branch_pool],
            axis=channel_axis,
            name=f"mixed{9 + i}",
        )
    
    if include_top:

        # GAPを行った後、全結合層に引き渡す。
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)

        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes, activation=classifier_activation, name="predictions")(x)

    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D()(x)


    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = operation_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input


    # モデルを作る
    model = Functional(inputs, x, name=name)

    # 重みの読み込み
    if weights == "imagenet":
        if include_top:
            weights_path = file_utils.get_file(
                "inception_v3_weights_tf_dim_ordering_tf_kernels.h5",
                WEIGHTS_PATH,
                cache_subdir="models",
                file_hash="9a0d58056eeedaa3f26cb7ebd46da564",
            )
        else:
            weights_path = file_utils.get_file(
                "inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5",
                WEIGHTS_PATH_NO_TOP,
                cache_subdir="models",
                file_hash="bcbd6486424b2319ff4ef7d526e38f63",
            )
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model


# 畳み込み + バッチ正規化
def conv2d_bn(x, filters, num_row, num_col, padding="same", strides=(1, 1), name=None):
    if name is not None:
        bn_name = f"{name}_bn"
        conv_name = f"{name}_conv"
    else:
        bn_name = None
        conv_name = None

    if backend.image_data_format() == "channels_first":
        bn_axis = 1
    else:
        bn_axis = 3


    # 畳み込みを行い、バッチ正規化を経て、ReLU
    x = layers.Conv2D(filters, (num_row, num_col), strides=strides, padding=padding, use_bias=False, name=conv_name, )(x)
    x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
    x = layers.Activation("relu", name=name)(x)

    return x


@keras_export("keras.applications.inception_v3.preprocess_input")
def preprocess_input(x, data_format=None):
    return imagenet_utils.preprocess_input(
        x, data_format=data_format, mode="tf"
    )

@keras_export("keras.applications.inception_v3.decode_predictions")
def decode_predictions(preds, top=5):
    return imagenet_utils.decode_predictions(preds, top=top)


preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format(
    mode="",
    ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF,
    error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC,
)
decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__

また、VGG16とは異なり、畳み込みの際にバッチ正規化を行っている。これにより勾配消失・勾配爆発を抑止している。

ResNet

ResNet は残差接続(スキップコネクション、ショートカット接続とも言う)を採用し、残差(今の層の出力) に対して入力値を加算して次の層に引き渡している。

その引き渡しの際に、チャンネル数が異なれば加算はできないため1x1畳み込みでチャンネル数を調整している。

https://github.com/keras-team/keras/blob/master/keras/src/applications/resnet.py

from keras.src import backend
from keras.src import layers
from keras.src.api_export import keras_export
from keras.src.applications import imagenet_utils
from keras.src.models import Functional
from keras.src.ops import operation_utils
from keras.src.utils import file_utils

BASE_WEIGHTS_PATH = (
    "https://storage.googleapis.com/tensorflow/keras-applications/resnet/"
)
WEIGHTS_HASHES = {
    "resnet50": (
        "2cb95161c43110f7111970584f804107",
        "4d473c1dd8becc155b73f8504c6f6626",
    ),
    "resnet101": (
        "f1aeb4b969a6efcfb50fad2f0c20cfc5",
        "88cf7a10940856eca736dc7b7e228a21",
    ),
    "resnet152": (
        "100835be76be38e30d865e96f2aaae62",
        "ee4c566cf9a93f14d82f913c2dc6dd0c",
    ),
    "resnet50v2": (
        "3ef43a0b657b3be2300d5770ece849e0",
        "fac2f116257151a9d068a22e544a4917",
    ),
    "resnet101v2": (
        "6343647c601c52e1368623803854d971",
        "c0ed64b8031c3730f411d2eb4eea35b5",
    ),
    "resnet152v2": (
        "a49b44d1979771252814e80f8ec446f9",
        "ed17cf2e0169df9d443503ef94b23b33",
    ),
    "resnext50": (
        "67a5b30d522ed92f75a1f16eef299d1a",
        "62527c363bdd9ec598bed41947b379fc",
    ),
    "resnext101": (
        "34fb605428fcc7aa4d62f44404c11509",
        "0f678c91647380debd923963594981b3",
    ),
}

# 原型となるResNet、残差接続ギミックは含まず。
def ResNet(
    stack_fn,
    preact,
    use_bias,
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="resnet",
    weights_name=None,
):

    # 入力値のバリデーション等

    if not (weights in {"imagenet", None} or file_utils.exists(weights)):
        raise ValueError(
            "The `weights` argument should be either "
            "`None` (random initialization), 'imagenet' "
            "(pre-training on ImageNet), "
            "or the path to the weights file to be loaded.  Received: "
            f"weights={weights}"
        )

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            "If using `weights='imagenet'` with `include_top=True`, "
            "`classes` should be 1000.  "
            f"Received classes={classes}"
        )

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=224,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights,
    )

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    if backend.image_data_format() == "channels_last":
        bn_axis = 3
    else:
        bn_axis = 1

    # 入力値のバリデーション等


    # ゼロ埋め + 7x7 畳み込み
    x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name="conv1_pad")(img_input)
    x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, name="conv1_conv")(x)

    if not preact:
        x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name="conv1_bn")(x)
        x = layers.Activation("relu", name="conv1_relu")(x)

    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name="pool1_pad")(x)
    x = layers.MaxPooling2D(3, strides=2, name="pool1_pool")(x)

    x = stack_fn(x)

    if preact:
        x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name="post_bn")(x)
        x = layers.Activation("relu", name="post_relu")(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)

        # Validate activation for the classifier layer
        imagenet_utils.validate_activation(classifier_activation, weights)

        x = layers.Dense(classes, activation=classifier_activation, name="predictions")(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)


    # 入力値の扱い
    if input_tensor is not None:
        inputs = operation_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # モデルを作る
    model = Functional(inputs, x, name=name)

    # 重みをロード
    if (weights == "imagenet") and (weights_name in WEIGHTS_HASHES):
        if include_top:
            file_name = f"{weights_name}_weights_tf_dim_ordering_tf_kernels.h5"
            file_hash = WEIGHTS_HASHES[weights_name][0]
        else:
            file_name = (f"{weights_name}_weights_tf_dim_ordering_tf_kernels_notop.h5")
            file_hash = WEIGHTS_HASHES[weights_name][1]
        weights_path = file_utils.get_file(
            file_name,
            f"{BASE_WEIGHTS_PATH}{file_name}",
            cache_subdir="models",
            file_hash=file_hash,
        )
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model


# 残差接続ギミックを含む residual_block (残差ネットワーク)
def residual_block_v1(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):

    if backend.image_data_format() == "channels_last":
        bn_axis = 3
    else:
        bn_axis = 1

    # 残差接続用の出力値を畳み込みしてから保存するか?それともそのまま保存するか
    if conv_shortcut:
        # 1x1畳み込みで、チャンネル数を調整している。
        shortcut = layers.Conv2D(4 * filters, 1, strides=stride, name=f"{name}_0_conv")(x)
        shortcut = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=f"{name}_0_bn")(shortcut)
    else:
        shortcut = x

    x = layers.Conv2D(filters, 1, strides=stride, name=f"{name}_1_conv")(x)
    x = layers.BatchNormalization( axis=bn_axis, epsilon=1.001e-5, name=f"{name}_1_bn")(x)
    x = layers.Activation("relu", name=f"{name}_1_relu")(x)

    x = layers.Conv2D(filters, kernel_size, padding="SAME", name=f"{name}_2_conv")(x)
    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=f"{name}_2_bn")(x)
    x = layers.Activation("relu", name=f"{name}_2_relu")(x)

    x = layers.Conv2D(4 * filters, 1, name=f"{name}_3_conv")(x)
    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=f"{name}_3_bn")(x)

    # ここで残差接続用に用意した最初の層の出力値(もしくはそれ以前の入力値) を与える。
    x = layers.Add(name=f"{name}_add")([shortcut, x])
    x = layers.Activation("relu", name=f"{name}_out")(x)

    return x


# 残差ネットワークV1を呼び出す。
def stack_residual_blocks_v1(x, filters, blocks, stride1=2, name=None):
    x = residual_block_v1(x, filters, stride=stride1, name=f"{name}_block1")

    for i in range(2, blocks + 1):
        x = residual_block_v1(x, filters, conv_shortcut=False, name=f"{name}_block{i}")

    return x


# 残差接続ギミックを含む residual_block (残差ネットワーク)
def residual_block_v2(x, filters, kernel_size=3, stride=1, conv_shortcut=False, name=None):

    if backend.image_data_format() == "channels_last":
        bn_axis = 3
    else:
        bn_axis = 1

    preact = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=f"{name}_preact_bn")(x)
    preact = layers.Activation("relu", name=f"{name}_preact_relu")(preact)

    # 残差接続用の値を保持しておく。 畳み込みをしてから保存か、マックスプーリングをしてから保存か?
    if conv_shortcut:
        # 畳み込みで保存。1x1でチャンネル調整
        shortcut = layers.Conv2D(4 * filters, 1, strides=stride, name=f"{name}_0_conv")(preact)
    else:
        # マックスプーリングで保存 1x1 でチャンネル調整
        shortcut = (layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x)

    x = layers.Conv2D(filters, 1, strides=1, use_bias=False, name=f"{name}_1_conv")(preact)
    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=f"{name}_1_bn")(x)
    x = layers.Activation("relu", name=f"{name}_1_relu")(x)

    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=f"{name}_2_pad")(x)
    x = layers.Conv2D(filters,kernel_size,strides=stride,use_bias=False, name=f"{name}_2_conv", )(x)
    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=f"{name}_2_bn")(x)
    x = layers.Activation("relu", name=f"{name}_2_relu")(x)

    x = layers.Conv2D(4 * filters, 1, name=f"{name}_3_conv")(x)
    x = layers.Add(name=f"{name}_out")([shortcut, x])

    return x


# 残差ネットワークV2を呼び出す。
def stack_residual_blocks_v2(x, filters, blocks, stride1=2, name=None):
    x = residual_block_v2(x, filters, conv_shortcut=True, name=f"{name}_block1")

    for i in range(2, blocks):
        x = residual_block_v2(x, filters, name=f"{name}_block{i}")

    x = residual_block_v2(x, filters, stride=stride1, name=f"{name}_block{str(blocks)}")

    return x



# 以下ResNet50~101の関数。
@keras_export(
    [
        "keras.applications.resnet50.ResNet50",
        "keras.applications.resnet.ResNet50",
        "keras.applications.ResNet50",
    ]
)
def ResNet50(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="resnet50",
):

    def stack_fn(x):
        x = stack_residual_blocks_v1(x, 64, 3, stride1=1, name="conv2")
        x = stack_residual_blocks_v1(x, 128, 4, name="conv3")
        x = stack_residual_blocks_v1(x, 256, 6, name="conv4")
        return stack_residual_blocks_v1(x, 512, 3, name="conv5")

    return ResNet(
        stack_fn,
        preact=False,
        use_bias=True,
        weights_name="resnet50",
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
    )


@keras_export(
    [
        "keras.applications.resnet.ResNet101",
        "keras.applications.ResNet101",
    ]
)
def ResNet101(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="resnet101",
):

    def stack_fn(x):
        x = stack_residual_blocks_v1(x, 64, 3, stride1=1, name="conv2")
        x = stack_residual_blocks_v1(x, 128, 4, name="conv3")
        x = stack_residual_blocks_v1(x, 256, 23, name="conv4")
        return stack_residual_blocks_v1(x, 512, 3, name="conv5")

    return ResNet(
        stack_fn,
        preact=False,
        use_bias=True,
        name=name,
        weights_name="resnet101",
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
    )


@keras_export(
    [
        "keras.applications.resnet.ResNet152",
        "keras.applications.ResNet152",
    ]
)
def ResNet152(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="resnet152",
):

    def stack_fn(x):
        x = stack_residual_blocks_v1(x, 64, 3, stride1=1, name="conv2")
        x = stack_residual_blocks_v1(x, 128, 8, name="conv3")
        x = stack_residual_blocks_v1(x, 256, 36, name="conv4")
        return stack_residual_blocks_v1(x, 512, 3, name="conv5")

    return ResNet(
        stack_fn,
        preact=False,
        use_bias=True,
        name=name,
        weights_name="resnet152",
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
    )


@keras_export(
    [
        "keras.applications.resnet50.preprocess_input",
        "keras.applications.resnet.preprocess_input",
    ]
)
def preprocess_input(x, data_format=None):
    return imagenet_utils.preprocess_input(
        x, data_format=data_format, mode="caffe"
    )


@keras_export(
    [
        "keras.applications.resnet50.decode_predictions",
        "keras.applications.resnet.decode_predictions",
    ]
)
def decode_predictions(preds, top=5):
    return imagenet_utils.decode_predictions(preds, top=top)


preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format(
    mode="",
    ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_CAFFE,
    error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC,
)
decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__

DOC = "Reference: 以下略 "

if ResNet50.__doc__ is not None:
    setattr(ResNet50, "__doc__", ResNet50.__doc__ + DOC)
if ResNet101.__doc__ is not None:
    setattr(ResNet101, "__doc__", ResNet101.__doc__ + DOC)
if ResNet152.__doc__ is not None:
    setattr(ResNet152, "__doc__", ResNet152.__doc__ + DOC)

WideResNetはkerasには含まれない。

幅を広げる方向で精度を高めるアプローチは、EfficientNetの登場により適していないとされた。

MobileNet

MobileNet は チャンネルを分離し空間方向にのみ畳み込みを行うdepthwise と 1x1畳み込みであるpointwiseを使用して計算量を減らしている。

MobileNetV2では、前方の1x1でチャンネルを増やし、depthwise に送り、後方の1x1でチャンネル数を戻す。そして、情報の損失を防ぐため線形で出力を行う。これをリニアボトルネックという。

MobileNetV3では、そのリニアボトルネックに加え1x1でチャンネルを戻す際、チャンネルごとの重要度を取り決めるSE層が用意されており、これにより重要なチャンネルだけ残し精度の向上が期待できる。

https://github.com/keras-team/keras/blob/master/keras/src/applications/mobilenet.py

import warnings

from keras.src import backend
from keras.src import layers
from keras.src.api_export import keras_export
from keras.src.applications import imagenet_utils
from keras.src.models import Functional
from keras.src.ops import operation_utils
from keras.src.utils import file_utils

BASE_WEIGHT_PATH = (
    "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/"
)


@keras_export(
    [
        "keras.applications.mobilenet.MobileNet",
        "keras.applications.MobileNet",
    ]
)
def MobileNet(
    input_shape=None,
    alpha=1.0,
    depth_multiplier=1,
    dropout=1e-3,
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name=None,
):

    # バリデーションと入力値の扱いについて

    if not (weights in {"imagenet", None} or file_utils.exists(weights)):
        raise ValueError(
            "The `weights` argument should be either "
            "`None` (random initialization), 'imagenet' "
            "(pre-training on ImageNet), "
            "or the path to the weights file to be loaded. "
            f"Received weights={weights}"
        )

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            "If using `weights='imagenet'` with `include_top=True`, "
            "`classes` should be 1000.  "
            f"Received classes={classes}"
        )

    # Determine proper input shape and default size.
    if input_shape is None:
        default_size = 224
    else:
        if backend.image_data_format() == "channels_first":
            rows = input_shape[1]
            cols = input_shape[2]
        else:
            rows = input_shape[0]
            cols = input_shape[1]

        if rows == cols and rows in [128, 160, 192, 224]:
            default_size = rows
        else:
            default_size = 224

    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=default_size,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights,
    )

    if backend.image_data_format() == "channels_last":
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]

    if weights == "imagenet":
        if depth_multiplier != 1:
            raise ValueError(
                "If imagenet weights are being loaded, "
                "depth multiplier must be 1.  "
                f"Received depth_multiplier={depth_multiplier}"
            )

        if alpha not in [0.25, 0.50, 0.75, 1.0]:
            raise ValueError(
                "If imagenet weights are being loaded, "
                "alpha can be one of"
                "`0.25`, `0.50`, `0.75` or `1.0` only.  "
                f"Received alpha={alpha}"
            )

        if rows != cols or rows not in [128, 160, 192, 224]:
            rows = 224
            warnings.warn(
                "`input_shape` is undefined or non-square, "
                "or `rows` is not in [128, 160, 192, 224]. "
                "Weights for input shape (224, 224) will be "
                "loaded as the default.",
                stacklevel=2,
            )

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor


    # バリデーションと入力値の扱いについて


    x = _conv_block(img_input, 32, alpha, strides=(2, 2))

    # TIPS: depthwise の畳み込み。チャンネル方向から分離して空間方向にのみ畳み込み
    x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)

    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2)
    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)

    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4)
    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)

    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)

    x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12)
    x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)

    # GAPを使い、ドロップアウトして分類
    if include_top:
        x = layers.GlobalAveragePooling2D(keepdims=True)(x)
        x = layers.Dropout(dropout, name="dropout")(x)
        x = layers.Conv2D(classes, (1, 1), padding="same", name="conv_preds")(x)
        x = layers.Reshape((classes,), name="reshape_2")(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Activation(activation=classifier_activation, name="predictions")(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D()(x)

    if input_tensor is not None:
        inputs = operation_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # モデルの作成
    if name is None:
        name = f"mobilenet_{alpha:0.2f}_{rows}"
    model = Functional(inputs, x, name=name)

    # 重みの読み込み
    if weights == "imagenet":
        if alpha == 1.0:
            alpha_text = "1_0"
        elif alpha == 0.75:
            alpha_text = "7_5"
        elif alpha == 0.50:
            alpha_text = "5_0"
        else:
            alpha_text = "2_5"

        if include_top:
            model_name = "mobilenet_%s_%d_tf.h5" % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = file_utils.get_file(
                model_name, weight_path, cache_subdir="models"
            )
        else:
            model_name = "mobilenet_%s_%d_tf_no_top.h5" % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = file_utils.get_file(
                model_name, weight_path, cache_subdir="models"
            )
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model


def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):

    channel_axis = 1 if backend.image_data_format() == "channels_first" else -1
    filters = int(filters * alpha)
    x = layers.Conv2D(
        filters,
        kernel,
        padding="same",
        use_bias=False,
        strides=strides,
        name="conv1",
    )(inputs)
    x = layers.BatchNormalization(axis=channel_axis, name="conv1_bn")(x)
    return layers.ReLU(6.0, name="conv1_relu")(x)


def _depthwise_conv_block(
    inputs,
    pointwise_conv_filters,
    alpha,
    depth_multiplier=1,
    strides=(1, 1),
    block_id=1,
):
    
    # α値( 1x1畳み込みの際にチャンネル数を調整するハイパーパラメータ) を使う。
    channel_axis = 1 if backend.image_data_format() == "channels_first" else -1
    pointwise_conv_filters = int(pointwise_conv_filters * alpha)

    if strides == (1, 1):
        x = inputs
    else:
        x = layers.ZeroPadding2D(((0, 1), (0, 1)), name="conv_pad_%d" % block_id )(inputs)

    # TIPS: ここで空間方向に対してのみの3x3 畳み込みを行う
    x = layers.DepthwiseConv2D(
        (3, 3),
        padding="same" if strides == (1, 1) else "valid",
        depth_multiplier=depth_multiplier,
        strides=strides,
        use_bias=False,
        name="conv_dw_%d" % block_id,
    )(x)

    # 活性化関数前に バッチの正規化で勾配消失・爆発を抑止
    x = layers.BatchNormalization(axis=channel_axis, name="conv_dw_%d_bn" % block_id)(x)

    # ReLU6 : 6を上限としてそれ以上は6扱い。
    x = layers.ReLU(6.0, name="conv_dw_%d_relu" % block_id)(x)

    # TIPS: 1x1 畳み込みでチャンネル数を元に戻す。
    x = layers.Conv2D( pointwise_conv_filters, (1, 1), padding="same",use_bias=False, strides=(1, 1), name="conv_pw_%d" % block_id, )(x)

    
    # 活性化関数前に バッチの正規化で勾配消失・爆発を抑止
    x = layers.BatchNormalization(axis=channel_axis, name="conv_pw_%d_bn" % block_id)(x)

    # ReLU6 : 6を上限としてそれ以上は6扱い。
    return layers.ReLU(6.0, name="conv_pw_%d_relu" % block_id)(x)


@keras_export("keras.applications.mobilenet.preprocess_input")
def preprocess_input(x, data_format=None):
    return imagenet_utils.preprocess_input(
        x, data_format=data_format, mode="tf"
    )

@keras_export("keras.applications.mobilenet.decode_predictions")
def decode_predictions(preds, top=5):
    return imagenet_utils.decode_predictions(preds, top=top)


preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format(
    mode="",
    ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF,
    error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC,
)
decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__

MobileNetでは ReLU6 が使われている。これは6よりも大きい値は6に止め、勾配爆発を防いでいる。

後続のMobileNetV3では、より高性能なh-swishを使っている。更にSE層も加わりより重要なチャンネルの選別もしている。

https://github.com/keras-team/keras/blob/master/keras/src/applications/mobilenet_v3.py

import warnings

from keras.src import backend
from keras.src import layers
from keras.src.api_export import keras_export
from keras.src.applications import imagenet_utils
from keras.src.models import Functional
from keras.src.ops import operation_utils
from keras.src.utils import file_utils

BASE_WEIGHT_PATH = (
    "https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/"
)
WEIGHTS_HASHES = {
    "large_224_0.75_float": (
        "765b44a33ad4005b3ac83185abf1d0eb",
        "40af19a13ebea4e2ee0c676887f69a2e",
    ),
    "large_224_1.0_float": (
        "59e551e166be033d707958cf9e29a6a7",
        "07fb09a5933dd0c8eaafa16978110389",
    ),
    "large_minimalistic_224_1.0_float": (
        "675e7b876c45c57e9e63e6d90a36599c",
        "ec5221f64a2f6d1ef965a614bdae7973",
    ),
    "small_224_0.75_float": (
        "cb65d4e5be93758266aa0a7f2c6708b7",
        "ebdb5cc8e0b497cd13a7c275d475c819",
    ),
    "small_224_1.0_float": (
        "8768d4c2e7dee89b9d02b2d03d65d862",
        "d3e8ec802a04aa4fc771ee12a9a9b836",
    ),
    "small_minimalistic_224_1.0_float": (
        "99cd97fb2fcdad2bf028eb838de69e37",
        "cde8136e733e811080d9fcd8a252f7e4",
    ),
}


BASE_DOCSTRING = "省略"

# MobileNet V3本体
def MobileNetV3(
    stack_fn,
    last_point_ch,
    input_shape=None,
    alpha=1.0,
    model_type="large",
    minimalistic=False,
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    classes=1000,
    pooling=None,
    dropout_rate=0.2,
    classifier_activation="softmax",
    include_preprocessing=True,
    name=None,
):

    # 入力値のバリデーション
    if not (weights in {"imagenet", None} or file_utils.exists(weights)):
        raise ValueError(
            "The `weights` argument should be either "
            "`None` (random initialization), `imagenet` "
            "(pre-training on ImageNet), "
            "or the path to the weights file to be loaded.  "
            f"Received weights={weights}"
        )

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            'If using `weights="imagenet"` with `include_top` '
            "as true, `classes` should be 1000.  "
            f"Received classes={classes}"
        )

    if input_shape is not None and input_tensor is not None:
        try:
            is_input_t_tensor = backend.is_keras_tensor(input_tensor)
        except ValueError:
            try:
                is_input_t_tensor = backend.is_keras_tensor(
                    operation_utils.get_source_inputs(input_tensor)
                )
            except ValueError:
                raise ValueError(
                    "input_tensor: ",
                    input_tensor,
                    "is not type input_tensor.  "
                    f"Received type(input_tensor)={type(input_tensor)}",
                )
        if is_input_t_tensor:
            if backend.image_data_format() == "channels_first":
                if input_tensor.shape[1] != input_shape[1]:
                    raise ValueError(
                        "When backend.image_data_format()=channels_first, "
                        "input_shape[1] must equal "
                        "input_tensor.shape[1].  Received "
                        f"input_shape={input_shape}, "
                        "input_tensor.shape="
                        f"{input_tensor.shape}"
                    )
            else:
                if input_tensor.shape[2] != input_shape[1]:
                    raise ValueError(
                        "input_shape[1] must equal "
                        "input_tensor.shape[2].  Received "
                        f"input_shape={input_shape}, "
                        "input_tensor.shape="
                        f"{input_tensor.shape}"
                    )
        else:
            raise ValueError(
                "input_tensor specified: ",
                input_tensor,
                "is not a keras tensor",
            )

    # 入力値のバリデーション


    # 入力値の扱い
    if input_shape is None and input_tensor is not None:
        try:
            backend.is_keras_tensor(input_tensor)
        except ValueError:
            raise ValueError(
                "input_tensor: ",
                input_tensor,
                "is type: ",
                type(input_tensor),
                "which is not a valid type",
            )

        if backend.is_keras_tensor(input_tensor):
            if backend.image_data_format() == "channels_first":
                rows = input_tensor.shape[2]
                cols = input_tensor.shape[3]
                input_shape = (3, cols, rows)
            else:
                rows = input_tensor.shape[1]
                cols = input_tensor.shape[2]
                input_shape = (cols, rows, 3)
    # If input_shape is None and input_tensor is None using standard shape
    if input_shape is None and input_tensor is None:
        if backend.image_data_format() == "channels_last":
            input_shape = (None, None, 3)
        else:
            input_shape = (3, None, None)

    if backend.image_data_format() == "channels_last":
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]
    if rows and cols and (rows < 32 or cols < 32):
        raise ValueError(
            "Input size must be at least 32x32; Received `input_shape="
            f"{input_shape}`"
        )
    if weights == "imagenet":
        if (
            not minimalistic
            and alpha not in [0.75, 1.0]
            or minimalistic
            and alpha != 1.0
        ):
            raise ValueError(
                "If imagenet weights are being loaded, "
                "alpha can be one of `0.75`, `1.0` for non minimalistic "
                "or `1.0` for minimalistic only."
            )

        if rows != cols or rows != 224:
            warnings.warn(
                "`input_shape` is undefined or non-square, "
                "or `rows` is not 224. "
                "Weights for input shape (224, 224) will be "
                "loaded as the default.",
                stacklevel=2,
            )

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # 入力値の扱い


    channel_axis = 1 if backend.image_data_format() == "channels_first" else -1

    if minimalistic:
        kernel = 3
        activation = relu
        se_ratio = None
    else:
        kernel = 5
        activation = hard_swish
        se_ratio = 0.25

    x = img_input
    if include_preprocessing:
        x = layers.Rescaling(scale=1.0 / 127.5, offset=-1.0)(x)


    # 3x3畳み込みとバッチ正規化。
    x = layers.Conv2D(16,kernel_size=3,strides=(2, 2),padding="same",use_bias=False,name="conv",)(x)
    x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name="conv_bn")(x)
    x = activation(x)

    x = stack_fn(x, kernel, activation, se_ratio)

    last_conv_ch = _depth(x.shape[channel_axis] * 6)
    
    # α値(チャンネル数を調整するハイパーパラメータ)でチャンネル削減
    if alpha > 1.0:
        last_point_ch = _depth(last_point_ch * alpha)

    x = layers.Conv2D(last_conv_ch,kernel_size=1,padding="same",use_bias=False,name="conv_1",)(x)
    x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name="conv_1_bn")(x)
    x = activation(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(keepdims=True)(x)
        x = layers.Conv2D(last_point_ch,kernel_size=1,padding="same",use_bias=True,name="conv_2", )(x)
        x = activation(x)

        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate)(x)

        x = layers.Conv2D(classes, kernel_size=1, padding="same", name="logits")(x)
        x = layers.Flatten()(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Activation(activation=classifier_activation, name="predictions")(x)

    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)

    if input_tensor is not None:
        inputs = operation_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input


    # モデルを作る
    model = Functional(inputs, x, name=name)


    # 重みの読み込み
    if weights == "imagenet":
        model_name = "{}{}_224_{}_float".format(model_type, "_minimalistic" if minimalistic else "", str(alpha))
        if include_top:
            file_name = f"weights_mobilenet_v3_{model_name}.h5"
            file_hash = WEIGHTS_HASHES[model_name][0]
        else:
            file_name = f"weights_mobilenet_v3_{model_name}_no_top_v2.h5"
            file_hash = WEIGHTS_HASHES[model_name][1]

        weights_path = file_utils.get_file(file_name,BASE_WEIGHT_PATH + file_name,cache_subdir="models",file_hash=file_hash,)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)



    return model


# MobileNet の小型仕様。インバーテッド残差構造を呼び出したあと、MobileNetV3本体を呼び出す。
@keras_export("keras.applications.MobileNetV3Small")
def MobileNetV3Small(
    input_shape=None,
    alpha=1.0,
    minimalistic=False,
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    classes=1000,
    pooling=None,
    dropout_rate=0.2,
    classifier_activation="softmax",
    include_preprocessing=True,
    name="MobileNetV3Small",
):
    def stack_fn(x, kernel, activation, se_ratio):
        def depth(d):
            return _depth(d * alpha)

        x = _inverted_res_block(x, 1, depth(16), 3, 2, se_ratio, relu, 0)
        x = _inverted_res_block(x, 72.0 / 16, depth(24), 3, 2, None, relu, 1)
        x = _inverted_res_block(x, 88.0 / 24, depth(24), 3, 1, None, relu, 2)
        x = _inverted_res_block(x, 4, depth(40), kernel, 2, se_ratio, activation, 3)
        x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 4)
        x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 5)
        x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 6)
        x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 7)
        x = _inverted_res_block(x, 6, depth(96), kernel, 2, se_ratio, activation, 8)
        x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, 9)
        x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, 10)

        return x

    return MobileNetV3(
        stack_fn,
        1024,
        input_shape,
        alpha,
        "small",
        minimalistic,
        include_top,
        weights,
        input_tensor,
        classes,
        pooling,
        dropout_rate,
        classifier_activation,
        include_preprocessing,
        name=name,
    )


# MobileNet の大型仕様。インバーテッド残差構造を呼び出したあと、MobileNetV3本体を呼び出す。
@keras_export("keras.applications.MobileNetV3Large")
def MobileNetV3Large(
    input_shape=None,
    alpha=1.0,
    minimalistic=False,
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    classes=1000,
    pooling=None,
    dropout_rate=0.2,
    classifier_activation="softmax",
    include_preprocessing=True,
    name="MobileNetV3Large",
):
    def stack_fn(x, kernel, activation, se_ratio):
        def depth(d):
            return _depth(d * alpha)

        x = _inverted_res_block(x, 1, depth(16), 3, 1, None, relu, 0)
        x = _inverted_res_block(x, 4, depth(24), 3, 2, None, relu, 1)
        x = _inverted_res_block(x, 3, depth(24), 3, 1, None, relu, 2)
        x = _inverted_res_block(x, 3, depth(40), kernel, 2, se_ratio, relu, 3)
        x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 4)
        x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 5)
        x = _inverted_res_block(x, 6, depth(80), 3, 2, None, activation, 6)
        x = _inverted_res_block(x, 2.5, depth(80), 3, 1, None, activation, 7)
        x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 8)
        x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 9)
        x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 10)
        x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 11)
        x = _inverted_res_block(x, 6, depth(160), kernel, 2, se_ratio, activation, 12)
        x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation, 13)
        x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation, 14)

        return x

    return MobileNetV3(
        stack_fn,
        1280,
        input_shape,
        alpha,
        "large",
        minimalistic,
        include_top,
        weights,
        input_tensor,
        classes,
        pooling,
        dropout_rate,
        classifier_activation,
        include_preprocessing,
        name=name,
    )


MobileNetV3Small.__doc__ = BASE_DOCSTRING.format(name="MobileNetV3Small")
MobileNetV3Large.__doc__ = BASE_DOCSTRING.format(name="MobileNetV3Large")


def relu(x):
    return layers.ReLU()(x)


def hard_sigmoid(x):
    return layers.ReLU(6.0)(x + 3.0) * (1.0 / 6.0)

# ReLU6にとって変わった h-swish 
def hard_swish(x):
    return layers.Activation("hard_swish")(x)


# α値によってチャンネルを削減する関数。すべての層に0.5、0.75を掛けて一律に減らす。
def _depth(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


# これがSE層。チャンネルごとに重要度を取り決めている。
def _se_block(inputs, filters, se_ratio, prefix):
    # GAP でサイズの縮小+特徴抽出
    x = layers.GlobalAveragePooling2D(keepdims=True, name=f"{prefix}squeeze_excite_avg_pool")(inputs)

    # 1x1畳み込みでチャンネル調整。ReLUで活性化
    x = layers.Conv2D(_depth(filters * se_ratio),kernel_size=1,padding="same",name=f"{prefix}squeeze_excite_conv",)(x)
    x = layers.ReLU(name=f"{prefix}squeeze_excite_relu")(x)

    # 1x1畳み込みでチャンネル調整 h-sigmoid で活性化
    x = layers.Conv2D(filters,kernel_size=1,padding="same",name=f"{prefix}squeeze_excite_conv_1",)(x)
    x = hard_sigmoid(x)

    # 重要度によるフィルタリング(スケーリング)。各チャンネルの重要度を 0.0 ~ 1.0 の範囲で算出をする。
    x = layers.Multiply(name=f"{prefix}squeeze_excite_mul")([inputs, x])

    return x


# インバーテッド残差構造 (MobileNetV2から登場) 
# 1x1(pointwise) でチャンネル増やし、depthwise で 空間畳み込み、1x1でチャンネル戻す。
def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio, activation, block_id):

    channel_axis = 1 if backend.image_data_format() == "channels_first" else -1
    shortcut = x
    prefix = "expanded_conv_"
    infilters = x.shape[channel_axis]

    if block_id:
        # ここでチャンネル拡張
        prefix = f"expanded_conv_{block_id}_"
        x = layers.Conv2D(_depth(infilters * expansion),kernel_size=1,padding="same",use_bias=False,name=f"{prefix}expand",)(x)
        x = layers.BatchNormalization(axis=channel_axis,epsilon=1e-3,momentum=0.999,name=f"{prefix}expand_bn",)(x)

        x = activation(x)

    if stride == 2:
        x = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(x, kernel_size),name=f"{prefix}depthwise_pad",)(x)

    # depthwise で空間方向に畳み込み
    x = layers.DepthwiseConv2D(kernel_size,strides=stride,padding="same" if stride == 1 else "valid",use_bias=False,name=f"{prefix}depthwise",)(x)
    x = layers.BatchNormalization(axis=channel_axis,epsilon=1e-3,momentum=0.999,name=f"{prefix}depthwise_bn",)(x)
    x = activation(x)

    # ここでSE層を呼び出しチャンネルの重要度を考慮して 0.0 ~ 1.0 の範囲で算出
    if se_ratio:
        x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix)

    # 重要度を考慮してのチャンネル削減
    x = layers.Conv2D(filters,kernel_size=1,padding="same",use_bias=False,name=f"{prefix}project",)(x)
    x = layers.BatchNormalization(axis=channel_axis,epsilon=1e-3,momentum=0.999,name=f"{prefix}project_bn",)(x)

    # 解像度とチャンネル数が変わっていない場合に限り、残差接続を行う。
    if stride == 1 and infilters == filters:
        x = layers.Add(name=f"{prefix}add")([shortcut, x])

    return x


@keras_export("keras.applications.mobilenet_v3.preprocess_input")
def preprocess_input(x, data_format=None):
    return x


@keras_export("keras.applications.mobilenet_v3.decode_predictions")
def decode_predictions(preds, top=5):
    return imagenet_utils.decode_predictions(preds, top=top)


decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__

DenseNet

DenseNetは残差接続ではなく、結合によって入力値をそのまま後続に引き渡す。残差接続と違って、情報をロスなく引き継ぐことができる。だが、メモリ消費量が多い。これがDense Blockである。

この値の結合時、各層でどれぐらいのチャンネルを生成するかが問題になる。成長率(k)で調整を行う。チャンネルを増やしすぎれば幅は太くなり表現力は増すが、計算コストは一気に増える。

結合を繰り返すことでチャンネル数はどんどん増えていくため、適度に削減をしなければならない。これがTransition Layerである。1x1畳み込みと2x2平均プーリングを使う。

DenseNetはこのDenseBlockとTransitionLayerの2つで作られている。

https://github.com/keras-team/keras/blob/master/keras/src/applications/densenet.py

from keras.src import backend
from keras.src import layers
from keras.src.api_export import keras_export
from keras.src.applications import imagenet_utils
from keras.src.models import Functional
from keras.src.ops import operation_utils
from keras.src.utils import file_utils

BASE_WEIGHTS_PATH = ("https://storage.googleapis.com/tensorflow/keras-applications/densenet/")
DENSENET121_WEIGHT_PATH = (f"{BASE_WEIGHTS_PATH}densenet121_weights_tf_dim_ordering_tf_kernels.h5")
DENSENET121_WEIGHT_PATH_NO_TOP = (f"{BASE_WEIGHTS_PATH}"
    "densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5")
DENSENET169_WEIGHT_PATH = (f"{BASE_WEIGHTS_PATH}densenet169_weights_tf_dim_ordering_tf_kernels.h5")
DENSENET169_WEIGHT_PATH_NO_TOP = (f"{BASE_WEIGHTS_PATH}"
    "densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5")
DENSENET201_WEIGHT_PATH = (f"{BASE_WEIGHTS_PATH}densenet201_weights_tf_dim_ordering_tf_kernels.h5")
DENSENET201_WEIGHT_PATH_NO_TOP = (f"{BASE_WEIGHTS_PATH}"
    "densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5")


# DenseBlockで入力値を結合して後続の層に引き渡す。
def dense_block(x, blocks, name):
    for i in range(blocks):
        x = conv_block(x, 32, name=f"{name}_block{i + 1}")
    return x


# TransitionLayer で チャンネル数を削減する。
def transition_block(x, reduction, name):
    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    # バッチ正規化とReLU
    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=f"{name}_bn")(x)
    x = layers.Activation("relu", name=f"{name}_relu")(x)

    # 1x1でチャンネル圧縮、平均プーリングで空間圧縮
    x = layers.Conv2D(int(x.shape[bn_axis] * reduction),1,use_bias=False,name=f"{name}_conv", )(x)
    x = layers.AveragePooling2D(2, strides=2, name=f"{name}_pool")(x)

    return x


def conv_block(x, growth_rate, name):

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1
    x1 = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=f"{name}_0_bn")(x)
    x1 = layers.Activation("relu", name=f"{name}_0_relu")(x1)
    x1 = layers.Conv2D(4 * growth_rate, 1, use_bias=False, name=f"{name}_1_conv")(x1)
    x1 = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=f"{name}_1_bn")(x1)
    x1 = layers.Activation("relu", name=f"{name}_1_relu")(x1)
    x1 = layers.Conv2D(growth_rate, 3, padding="same", use_bias=False, name=f"{name}_2_conv")(x1)
    x = layers.Concatenate(axis=bn_axis, name=f"{name}_concat")([x, x1])
    return x


def DenseNet(
    blocks,
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="densenet",
):

    if backend.image_data_format() == "channels_first":
        raise ValueError(
            "DenseNet does not support the `channels_first` image data "
            "format. Switch to `channels_last` by editing your local "
            "config file at ~/.keras/keras.json"
        )
    if not (weights in {"imagenet", None} or file_utils.exists(weights)):
        raise ValueError(
            "The `weights` argument should be either "
            "`None` (random initialization), `imagenet` "
            "(pre-training on ImageNet), "
            "or the path to the weights file to be loaded."
        )

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top`'
            " as true, `classes` should be 1000"
        )

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=224,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights,
    )

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)))(img_input)
    x = layers.Conv2D(64, 7, strides=2, use_bias=False, name="conv1_conv")(x)
    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name="conv1_bn")(x)
    x = layers.Activation("relu", name="conv1_relu")(x)
    x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x)
    x = layers.MaxPooling2D(3, strides=2, name="pool1")(x)

    # DenseBlock(結合)とTransitionLayer(チャンネル削減)の繰り返し
    x = dense_block(x, blocks[0], name="conv2")
    x = transition_block(x, 0.5, name="pool2")
    x = dense_block(x, blocks[1], name="conv3")
    x = transition_block(x, 0.5, name="pool3")
    x = dense_block(x, blocks[2], name="conv4")
    x = transition_block(x, 0.5, name="pool4")
    x = dense_block(x, blocks[3], name="conv5")

    x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name="bn")(x)
    x = layers.Activation("relu", name="relu")(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)

        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(
            classes, activation=classifier_activation, name="predictions"
        )(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)

    # 入力値の扱い
    if input_tensor is not None:
        inputs = operation_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # モデル作成
    model = Functional(inputs, x, name=name)


    # 重みの読み込み
    if weights == "imagenet":
        if include_top:
            if blocks == [6, 12, 24, 16]:
                weights_path = file_utils.get_file(
                    "densenet121_weights_tf_dim_ordering_tf_kernels.h5",
                    DENSENET121_WEIGHT_PATH,
                    cache_subdir="models",
                    file_hash="9d60b8095a5708f2dcce2bca79d332c7",
                )
            elif blocks == [6, 12, 32, 32]:
                weights_path = file_utils.get_file(
                    "densenet169_weights_tf_dim_ordering_tf_kernels.h5",
                    DENSENET169_WEIGHT_PATH,
                    cache_subdir="models",
                    file_hash="d699b8f76981ab1b30698df4c175e90b",
                )
            elif blocks == [6, 12, 48, 32]:
                weights_path = file_utils.get_file(
                    "densenet201_weights_tf_dim_ordering_tf_kernels.h5",
                    DENSENET201_WEIGHT_PATH,
                    cache_subdir="models",
                    file_hash="1ceb130c1ea1b78c3bf6114dbdfd8807",
                )
            else:
                raise ValueError("weights_path undefined")
        else:
            if blocks == [6, 12, 24, 16]:
                weights_path = file_utils.get_file(
                    "densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5",
                    DENSENET121_WEIGHT_PATH_NO_TOP,
                    cache_subdir="models",
                    file_hash="30ee3e1110167f948a6b9946edeeb738",
                )
            elif blocks == [6, 12, 32, 32]:
                weights_path = file_utils.get_file(
                    "densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5",
                    DENSENET169_WEIGHT_PATH_NO_TOP,
                    cache_subdir="models",
                    file_hash="b8c4d4c20dd625c148057b9ff1c1176b",
                )
            elif blocks == [6, 12, 48, 32]:
                weights_path = file_utils.get_file(
                    "densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5",
                    DENSENET201_WEIGHT_PATH_NO_TOP,
                    cache_subdir="models",
                    file_hash="c13680b51ded0fb44dff2d8f86ac8bb1",
                )
            else:
                raise ValueError("weights_path undefined")
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model


@keras_export(
    [
        "keras.applications.densenet.DenseNet121",
        "keras.applications.DenseNet121",
    ]
)
def DenseNet121(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="densenet121",
):
    """Instantiates the Densenet121 architecture."""
    return DenseNet(
        [6, 12, 24, 16],
        include_top,
        weights,
        input_tensor,
        input_shape,
        pooling,
        classes,
        classifier_activation,
        name=name,
    )


@keras_export(
    [
        "keras.applications.densenet.DenseNet169",
        "keras.applications.DenseNet169",
    ]
)
def DenseNet169(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="densenet169",
):
    """Instantiates the Densenet169 architecture."""
    return DenseNet(
        [6, 12, 32, 32],
        include_top,
        weights,
        input_tensor,
        input_shape,
        pooling,
        classes,
        classifier_activation,
        name=name,
    )


@keras_export(
    [
        "keras.applications.densenet.DenseNet201",
        "keras.applications.DenseNet201",
    ]
)
def DenseNet201(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="densenet201",
):
    """Instantiates the Densenet201 architecture."""
    return DenseNet(
        [6, 12, 48, 32],
        include_top,
        weights,
        input_tensor,
        input_shape,
        pooling,
        classes,
        classifier_activation,
        name=name,
    )


@keras_export("keras.applications.densenet.preprocess_input")
def preprocess_input(x, data_format=None):
    return imagenet_utils.preprocess_input(
        x, data_format=data_format, mode="torch"
    )


@keras_export("keras.applications.densenet.decode_predictions")
def decode_predictions(preds, top=5):
    return imagenet_utils.decode_predictions(preds, top=top)


preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format(
    mode="",
    ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TORCH,
    error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC,
)
decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__

DOC = "省略"

if DenseNet121.__doc__ is not None:
    setattr(DenseNet121, "__doc__", DenseNet121.__doc__ + DOC)
if DenseNet169.__doc__ is not None:
    setattr(DenseNet169, "__doc__", DenseNet169.__doc__ + DOC)
if DenseNet201.__doc__ is not None:
    setattr(DenseNet201, "__doc__", DenseNet201.__doc__ + DOC)

EfficientNet

EfficientNetは幅と深さと解像度を考慮してスケールする。

下記コードの末端には、B0~B7まで、Width, Depth, Resolution の3つをそれぞれ指定し、最適なものを用意している。

https://github.com/keras-team/keras/blob/master/keras/src/applications/efficientnet.py

この無印のEfficientNetはMobileNetV3のリニアボトルネック、インバーテッド残差構造をそのまま利用している。

つまり、EfficientNetとMobileNetV3の大まかな違いは、スケールしているか否かの違いぐらいでそれ以外はほぼ同じようなもの。

import copy
import math

from keras.src import backend
from keras.src import layers
from keras.src.api_export import keras_export
from keras.src.applications import imagenet_utils
from keras.src.models import Functional
from keras.src.ops import operation_utils
from keras.src.utils import file_utils

BASE_WEIGHTS_PATH = "https://storage.googleapis.com/keras-applications/"

WEIGHTS_HASHES = {
    "b0": (
        "902e53a9f72be733fc0bcb005b3ebbac",
        "50bc09e76180e00e4465e1a485ddc09d",
    ),
    "b1": (
        "1d254153d4ab51201f1646940f018540",
        "74c4e6b3e1f6a1eea24c589628592432",
    ),
    "b2": (
        "b15cce36ff4dcbd00b6dd88e7857a6ad",
        "111f8e2ac8aa800a7a99e3239f7bfb39",
    ),
    "b3": (
        "ffd1fdc53d0ce67064dc6a9c7960ede0",
        "af6d107764bb5b1abb91932881670226",
    ),
    "b4": (
        "18c95ad55216b8f92d7e70b3a046e2fc",
        "ebc24e6d6c33eaebbd558eafbeedf1ba",
    ),
    "b5": (
        "ace28f2a6363774853a83a0b21b9421a",
        "38879255a25d3c92d5e44e04ae6cec6f",
    ),
    "b6": (
        "165f6e37dce68623721b423839de8be5",
        "9ecce42647a20130c1f39a5d4cb75743",
    ),
    "b7": (
        "8c03f828fec3ef71311cd463b6759d99",
        "cbcfe4450ddf6f3ad90b1b398090fe4a",
    ),
}

DEFAULT_BLOCKS_ARGS = [
{ "kernel_size": 3,"repeats": 1,"filters_in": 32,"filters_out": 16,"expand_ratio": 1,"id_skip": True,"strides": 1,"se_ratio": 0.25, },
{ "kernel_size": 3,"repeats": 2,"filters_in": 16,"filters_out": 24,"expand_ratio": 6,"id_skip": True,"strides": 2,"se_ratio": 0.25,},
{ "kernel_size": 5,"repeats": 2,"filters_in": 24,"filters_out": 40,"expand_ratio": 6,"id_skip": True,"strides": 2,"se_ratio": 0.25,},
{ "kernel_size": 3,"repeats": 3,"filters_in": 40,"filters_out": 80,"expand_ratio": 6,"id_skip": True,"strides": 2,"se_ratio": 0.25,},
{ "kernel_size": 5,"repeats": 3,"filters_in": 80,"filters_out": 112,"expand_ratio": 6,"id_skip": True,"strides": 1,"se_ratio": 0.25,},
{ "kernel_size": 5,"repeats": 4,"filters_in": 112,"filters_out": 192,"expand_ratio": 6,"id_skip": True,"strides": 2,"se_ratio": 0.25,},
{ "kernel_size": 3,"repeats": 1,"filters_in": 192,"filters_out": 320,"expand_ratio": 6,"id_skip": True,"strides": 1,"se_ratio": 0.25,},
]

CONV_KERNEL_INITIALIZER = {
    "class_name": "VarianceScaling",
    "config": {
        "scale": 2.0,
        "mode": "fan_out",
        "distribution": "truncated_normal",
    },
}

DENSE_KERNEL_INITIALIZER = {
    "class_name": "VarianceScaling",
    "config": {
        "scale": 1.0 / 3.0,
        "mode": "fan_out",
        "distribution": "uniform",
    },
}

BASE_DOCSTRING = "省略"
IMAGENET_STDDEV_RGB = [0.229, 0.224, 0.225]


# EfficientNet の本体
def EfficientNet(width_coefficient,depth_coefficient,default_size,dropout_rate=0.2,drop_connect_rate=0.2,depth_divisor=8,activation="swish",blocks_args="default",name="efficientnet",include_top=True,weights="imagenet",input_tensor=None,input_shape=None,pooling=None,classes=1000,classifier_activation="softmax",weights_name=None,):


    # 引数のバリデーション
    if blocks_args == "default":
        blocks_args = DEFAULT_BLOCKS_ARGS

    if not (weights in {"imagenet", None} or file_utils.exists(weights)):
        raise ValueError(
            "The `weights` argument should be either "
            "`None` (random initialization), `imagenet` "
            "(pre-training on ImageNet), "
            "or the path to the weights file to be loaded."
        )

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            'If using `weights="imagenet"` with `include_top`'
            " as true, `classes` should be 1000"
        )

    # 引数のバリデーション


    # 入力値の扱い
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=default_size,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights,
    )

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # 入力値の扱い

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    def round_filters(filters, divisor=depth_divisor):
        """Round number of filters based on depth multiplier."""
        filters *= width_coefficient
        new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)
        # Make sure that round down does not go down by more than 10%.
        if new_filters < 0.9 * filters:
            new_filters += divisor
        return int(new_filters)

    def round_repeats(repeats):
        """Round number of repeats based on depth multiplier."""
        return int(math.ceil(depth_coefficient * repeats))


    # Build stem
    x = img_input
    x = layers.Rescaling(1.0 / 255.0)(x)
    x = layers.Normalization(axis=bn_axis)(x)

    if weights == "imagenet":
        x = layers.Rescaling([1.0 / math.sqrt(stddev) for stddev in IMAGENET_STDDEV_RGB])(x)

    x = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(x, 3), name="stem_conv_pad")(x)
    x = layers.Conv2D(round_filters(32),3,strides=2,padding="valid",use_bias=False,kernel_initializer=CONV_KERNEL_INITIALIZER,name="stem_conv",)(x)
    x = layers.BatchNormalization(axis=bn_axis, name="stem_bn")(x)
    x = layers.Activation(activation, name="stem_activation")(x)

    # Build blocks
    blocks_args = copy.deepcopy(blocks_args)

    b = 0
    blocks = float(sum(round_repeats(args["repeats"]) for args in blocks_args))
    for i, args in enumerate(blocks_args):
        if args["repeats"] <= 0:
            raise ValueError(
                f"The number of repeats in `EfficientNet` must be > 0. "
                f"Received: repeats={args['repeats']}"
            )
        # Update block input and output filters based on depth multiplier.
        args["filters_in"] = round_filters(args["filters_in"])
        args["filters_out"] = round_filters(args["filters_out"])

        for j in range(round_repeats(args.pop("repeats"))):
            # The first block needs to take care of stride and filter size
            # increase.
            if j > 0:
                args["strides"] = 1
                args["filters_in"] = args["filters_out"]
            x = block(x,activation,drop_connect_rate * b / blocks,name=f"block{i + 1}{chr(j + 97)}_",**args,)
            b += 1

    x = layers.Conv2D(round_filters(1280),1,padding="same",use_bias=False,kernel_initializer=CONV_KERNEL_INITIALIZER,name="top_conv",)(x)
    x = layers.BatchNormalization(axis=bn_axis, name="top_bn")(x)
    x = layers.Activation(activation, name="top_activation")(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name="top_dropout")(x)

        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(classes,activation=classifier_activation,kernel_initializer=DENSE_KERNEL_INITIALIZER,name="predictions",)(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)


    if input_tensor is not None:
        inputs = operation_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input


    # モデルを作る
    model = Functional(inputs, x, name=name)

    # 重みをロード
    if weights == "imagenet":
        if include_top:
            file_suffix = ".h5"
            file_hash = WEIGHTS_HASHES[weights_name][0]
        else:
            file_suffix = "_notop.h5"
            file_hash = WEIGHTS_HASHES[weights_name][1]
        file_name = name + file_suffix
        weights_path = file_utils.get_file(file_name,BASE_WEIGHTS_PATH + file_name,cache_subdir="models",file_hash=file_hash, )
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)


    return model


# V2の MBConv とFusedMBConvに当たる。
# 下記BlockはMobileNetV3の仕組みとほぼ同じ。
def block(inputs,activation="swish",drop_rate=0.0,name="",filters_in=32,filters_out=16,kernel_size=3,strides=1,expand_ratio=1,se_ratio=0.0,id_skip=True,):

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1
    
    # 1: 1x1畳み込みで チャンネル数を一気に増やす。
    filters = filters_in * expand_ratio
    if expand_ratio != 1:
        x = layers.Conv2D(filters,1,padding="same",use_bias=False,kernel_initializer=CONV_KERNEL_INITIALIZER,name=f"{name}expand_conv",)(inputs)
        x = layers.BatchNormalization(axis=bn_axis, name=f"{name}expand_bn")(x)
        x = layers.Activation(activation, name=f"{name}expand_activation")(x)
    else:
        x = inputs

    # 2: Depthwise で 空間方向にのみ畳み込みをする。
    if strides == 2:
        x = layers.ZeroPadding2D(
            padding=imagenet_utils.correct_pad(x, kernel_size),
            name=f"{name}dwconv_pad",
        )(x)
        conv_pad = "valid"
    else:
        conv_pad = "same"
    x = layers.DepthwiseConv2D(kernel_size,strides=strides,padding=conv_pad,use_bias=False,depthwise_initializer=CONV_KERNEL_INITIALIZER,name=f"{name}dwconv",)(x)
    x = layers.BatchNormalization(axis=bn_axis, name=f"{name}bn")(x)
    x = layers.Activation(activation, name=f"{name}activation")(x)

    # 3: SE層(Squeeze and Excitation ) チャンネルの重要度を計測
    if 0 < se_ratio <= 1:
        filters_se = max(1, int(filters_in * se_ratio))
        se = layers.GlobalAveragePooling2D(name=f"{name}se_squeeze")(x)

        if bn_axis == 1:
            se_shape = (filters, 1, 1)
        else:
            se_shape = (1, 1, filters)

        se = layers.Reshape(se_shape, name=f"{name}se_reshape")(se)
        se = layers.Conv2D(filters_se,1,padding="same",activation=activation,kernel_initializer=CONV_KERNEL_INITIALIZER,name=f"{name}se_reduce",)(se)
        se = layers.Conv2D(filters,1,padding="same",activation="sigmoid",kernel_initializer=CONV_KERNEL_INITIALIZER,name=f"{name}se_expand",)(se)
        x = layers.multiply([x, se], name=f"{name}se_excite")

    # 4: 1x1畳み込みで重要度に応じてチャンネル数を圧縮する。
    x = layers.Conv2D(filters_out,1,padding="same",use_bias=False,kernel_initializer=CONV_KERNEL_INITIALIZER,name=f"{name}project_conv",)(x)
    x = layers.BatchNormalization(axis=bn_axis, name=f"{name}project_bn")(x)

    if id_skip and strides == 1 and filters_in == filters_out:
        if drop_rate > 0:
            x = layers.Dropout(drop_rate, noise_shape=(None, 1, 1, 1), name=f"{name}drop")(x)

        x = layers.add([x, inputs], name=f"{name}add")

    return x


# 以下、B0~B7までの定義。

@keras_export(
    [
        "keras.applications.efficientnet.EfficientNetB0",
        "keras.applications.EfficientNetB0",
    ]
)
def EfficientNetB0(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="efficientnetb0",
):
    return EfficientNet(
        1.0,
        1.0,
        224,
        0.2,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        weights_name="b0",
    )


@keras_export(
    [
        "keras.applications.efficientnet.EfficientNetB1",
        "keras.applications.EfficientNetB1",
    ]
)
def EfficientNetB1(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="efficientnetb1",
):
    return EfficientNet(
        1.0,
        1.1,
        240,
        0.2,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        weights_name="b1",
    )


@keras_export(
    [
        "keras.applications.efficientnet.EfficientNetB2",
        "keras.applications.EfficientNetB2",
    ]
)
def EfficientNetB2(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="efficientnetb2",
):
    return EfficientNet(
        1.1,
        1.2,
        260,
        0.3,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        weights_name="b2",
    )


@keras_export(
    [
        "keras.applications.efficientnet.EfficientNetB3",
        "keras.applications.EfficientNetB3",
    ]
)
def EfficientNetB3(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="efficientnetb3",
):
    return EfficientNet(
        1.2,
        1.4,
        300,
        0.3,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        weights_name="b3",
    )


@keras_export(
    [
        "keras.applications.efficientnet.EfficientNetB4",
        "keras.applications.EfficientNetB4",
    ]
)
def EfficientNetB4(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="efficientnetb4",
):
    return EfficientNet(
        1.4,
        1.8,
        380,
        0.4,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        weights_name="b4",
    )


@keras_export(
    [
        "keras.applications.efficientnet.EfficientNetB5",
        "keras.applications.EfficientNetB5",
    ]
)
def EfficientNetB5(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="efficientnetb5",
):
    return EfficientNet(
        1.6,
        2.2,
        456,
        0.4,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        weights_name="b5",
    )


@keras_export(
    [
        "keras.applications.efficientnet.EfficientNetB6",
        "keras.applications.EfficientNetB6",
    ]
)
def EfficientNetB6(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="efficientnetb6",
):
    return EfficientNet(
        1.8,
        2.6,
        528,
        0.5,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        weights_name="b6",
    )


@keras_export(
    [
        "keras.applications.efficientnet.EfficientNetB7",
        "keras.applications.EfficientNetB7",
    ]
)
def EfficientNetB7(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    name="efficientnetb7",
):
    return EfficientNet(
        2.0,
        3.1,
        600,
        0.5,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        weights_name="b7",
    )


EfficientNetB0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB0")
EfficientNetB1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB1")
EfficientNetB2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB2")
EfficientNetB3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB3")
EfficientNetB4.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB4")
EfficientNetB5.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB5")
EfficientNetB6.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB6")
EfficientNetB7.__doc__ = BASE_DOCSTRING.format(name="EfficientNetB7")

@keras_export("keras.applications.efficientnet.preprocess_input")
def preprocess_input(x, data_format=None):
    return x


@keras_export("keras.applications.efficientnet.decode_predictions")
def decode_predictions(preds, top=5):
    return imagenet_utils.decode_predictions(preds, top=top)


decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__

EfficientNetは、スケールしている点を除いてMobileNetV3とほぼ変わらず。

EfficientNetV2 からはMBConvとFusedMBConv の2つを導入し先ほどのblock関数の部分を2分している。

https://github.com/keras-team/keras/blob/master/keras/src/applications/efficientnet_v2.py

import copy
import math

from keras.src import backend
from keras.src import initializers
from keras.src import layers
from keras.src.api_export import keras_export
from keras.src.applications import imagenet_utils
from keras.src.models import Functional
from keras.src.ops import operation_utils
from keras.src.utils import file_utils

BASE_WEIGHTS_PATH = "https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/"  # noqa: E501

WEIGHTS_HASHES = {
    "b0": (
        "21ecbf6da12460d5c40bb2f29ceb2188",
        "893217f2bb855e2983157299931e43ff",
    ),
    "b1": (
        "069f0534ff22adf035c89e2d9547a9dc",
        "0e80663031ca32d657f9caa404b6ec37",
    ),
    "b2": (
        "424e49f28180edbde1e94797771950a7",
        "1dfe2e7a5d45b6632553a8961ea609eb",
    ),
    "b3": (
        "1f1fc43bd98a6e4fd8fdfd551e02c7a0",
        "f6abf7b5849ac99a89b50dd3fd532856",
    ),
    "-s": (
        "e1d88a8495beba45748fedd0cecbe016",
        "af0682fb74e8c54910f2d4393339c070",
    ),
    "-m": (
        "a3bf6aa3276309f4fc6a34aa114c95cd",
        "1b8dc055df72dde80d614482840fe342",
    ),
    "-l": (
        "27e6d408b53c7ebc868fefa357689935",
        "b0b66b5c863aef5b46e8608fe1711615",
    ),
}

DEFAULT_BLOCKS_ARGS = {
"efficientnetv2-s": [
{"kernel_size":3,"num_repeat":2,"input_filters":24,"output_filters":24,"expand_ratio":1,"se_ratio":0.0,"strides":1,"conv_type":1,},
{"kernel_size":3,"num_repeat":4,"input_filters":24,"output_filters":48,"expand_ratio":4,"se_ratio":0.0,"strides":2,"conv_type":1,},
{"conv_type":1,"expand_ratio":4,"input_filters":48,"kernel_size":3,"num_repeat":4,"output_filters":64,"se_ratio":0,"strides":2,},
{"conv_type":0,"expand_ratio":4,"input_filters":64,"kernel_size":3,"num_repeat":6,"output_filters":128,"se_ratio":0.25,"strides":2,},
{"conv_type":0,"expand_ratio":6,"input_filters":128,"kernel_size":3,"num_repeat":9,"output_filters":160,"se_ratio":0.25,"strides":1,},
{"conv_type":0,"expand_ratio":6,"input_filters":160,"kernel_size":3,"num_repeat":15,"output_filters":256,"se_ratio":0.25,"strides":2,},],
"efficientnetv2-m":[
{"kernel_size":3,"num_repeat":3,"input_filters":24,"output_filters":24,"expand_ratio":1,"se_ratio":0,"strides":1,"conv_type":1,},
{"kernel_size":3,"num_repeat":5,"input_filters":24,"output_filters":48,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":5,"input_filters":48,"output_filters":80,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":7,"input_filters":80,"output_filters":160,"expand_ratio":4,"se_ratio":0.25,"strides":2,"conv_type":0,},
{"kernel_size":3,"num_repeat":14,"input_filters":160,"output_filters":176,"expand_ratio":6,"se_ratio":0.25,"strides":1,"conv_type":0,},
{"kernel_size":3,"num_repeat":18,"input_filters":176,"output_filters":304,"expand_ratio":6,"se_ratio":0.25,"strides":2,"conv_type":0,},
{"kernel_size":3,"num_repeat":5,"input_filters":304,"output_filters":512,"expand_ratio":6,"se_ratio":0.25,"strides":1,"conv_type":0,},],
"efficientnetv2-l":[
{"kernel_size":3,"num_repeat":4,"input_filters":32,"output_filters":32,"expand_ratio":1,"se_ratio":0,"strides":1,"conv_type":1,},
{"kernel_size":3,"num_repeat":7,"input_filters":32,"output_filters":64,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":7,"input_filters":64,"output_filters":96,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":10,"input_filters":96,"output_filters":192,"expand_ratio":4,"se_ratio":0.25,"strides":2,"conv_type":0,},
{"kernel_size":3,"num_repeat":19,"input_filters":192,"output_filters":224,"expand_ratio":6,"se_ratio":0.25,"strides":1,"conv_type":0,},
{"kernel_size":3,"num_repeat":25,"input_filters":224,"output_filters":384,"expand_ratio":6,"se_ratio":0.25,"strides":2,"conv_type":0,},
{"kernel_size":3,"num_repeat":7,"input_filters":384,"output_filters":640,"expand_ratio":6,"se_ratio":0.25,"strides":1,"conv_type":0,},],
"efficientnetv2-b0":[
{"kernel_size":3,"num_repeat":1,"input_filters":32,"output_filters":16,"expand_ratio":1,"se_ratio":0,"strides":1,"conv_type":1,},
{"kernel_size":3,"num_repeat":2,"input_filters":16,"output_filters":32,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":2,"input_filters":32,"output_filters":48,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":3,"input_filters":48,"output_filters":96,"expand_ratio":4,"se_ratio":0.25,"strides":2,"conv_type":0,},
{"kernel_size":3,"num_repeat":5,"input_filters":96,"output_filters":112,"expand_ratio":6,"se_ratio":0.25,"strides":1,"conv_type":0,},
{"kernel_size":3,"num_repeat":8,"input_filters":112,"output_filters":192,"expand_ratio":6,"se_ratio":0.25,"strides":2,"conv_type":0,},],
"efficientnetv2-b1":[
{"kernel_size":3,"num_repeat":1,"input_filters":32,"output_filters":16,"expand_ratio":1,"se_ratio":0,"strides":1,"conv_type":1,},
{"kernel_size":3,"num_repeat":2,"input_filters":16,"output_filters":32,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":2,"input_filters":32,"output_filters":48,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":3,"input_filters":48,"output_filters":96,"expand_ratio":4,"se_ratio":0.25,"strides":2,"conv_type":0,},
{"kernel_size":3,"num_repeat":5,"input_filters":96,"output_filters":112,"expand_ratio":6,"se_ratio":0.25,"strides":1,"conv_type":0,},
{"kernel_size":3,"num_repeat":8,"input_filters":112,"output_filters":192,"expand_ratio":6,"se_ratio":0.25,"strides":2,"conv_type":0,},],
"efficientnetv2-b2":[
{"kernel_size":3,"num_repeat":1,"input_filters":32,"output_filters":16,"expand_ratio":1,"se_ratio":0,"strides":1,"conv_type":1,},
{"kernel_size":3,"num_repeat":2,"input_filters":16,"output_filters":32,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":2,"input_filters":32,"output_filters":48,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":3,"input_filters":48,"output_filters":96,"expand_ratio":4,"se_ratio":0.25,"strides":2,"conv_type":0,},
{"kernel_size":3,"num_repeat":5,"input_filters":96,"output_filters":112,"expand_ratio":6,"se_ratio":0.25,"strides":1,"conv_type":0,},
{"kernel_size":3,"num_repeat":8,"input_filters":112,"output_filters":192,"expand_ratio":6,"se_ratio":0.25,"strides":2,"conv_type":0,},],
"efficientnetv2-b3":[
{"kernel_size":3,"num_repeat":1,"input_filters":32,"output_filters":16,"expand_ratio":1,"se_ratio":0,"strides":1,"conv_type":1,},
{"kernel_size":3,"num_repeat":2,"input_filters":16,"output_filters":32,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":2,"input_filters":32,"output_filters":48,"expand_ratio":4,"se_ratio":0,"strides":2,"conv_type":1,},
{"kernel_size":3,"num_repeat":3,"input_filters":48,"output_filters":96,"expand_ratio":4,"se_ratio":0.25,"strides":2,"conv_type":0,},
{"kernel_size":3,"num_repeat":5,"input_filters":96,"output_filters":112,"expand_ratio":6,"se_ratio":0.25,"strides":1,"conv_type":0,},
{"kernel_size":3,"num_repeat":8,"input_filters":112,"output_filters":192,"expand_ratio":6,"se_ratio":0.25,"strides":2,"conv_type":0,},],}

CONV_KERNEL_INITIALIZER = {
    "class_name": "VarianceScaling",
    "config": {
        "scale": 2.0,
        "mode": "fan_out",
        "distribution": "truncated_normal",
    },
}

DENSE_KERNEL_INITIALIZER = {
    "class_name": "VarianceScaling",
    "config": {
        "scale": 1.0 / 3.0,
        "mode": "fan_out",
        "distribution": "uniform",
    },
}

BASE_DOCSTRING = "省略"

def round_filters(filters, width_coefficient, min_depth, depth_divisor):
    """Round number of filters based on depth multiplier."""
    filters *= width_coefficient
    minimum_depth = min_depth or depth_divisor
    new_filters = max(
        minimum_depth,
        int(filters + depth_divisor / 2) // depth_divisor * depth_divisor,
    )
    return int(new_filters)


def round_repeats(repeats, depth_coefficient):
    """Round number of repeats based on depth multiplier."""
    return int(math.ceil(depth_coefficient * repeats))



# MBConv 層 (チャンネル数の調整→ 空間方向への畳み込み、チャンネル重要度取得、出力) 
def MBConvBlock(
    input_filters,
    output_filters,
    expand_ratio=1,
    kernel_size=3,
    strides=1,
    se_ratio=0.0,
    bn_momentum=0.9,
    activation="swish",
    survival_probability=0.8,
    name=None,
):
    """MBConv block: Mobile Inverted Residual Bottleneck."""
    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    if name is None:
        name = backend.get_uid("block0")

    def apply(inputs):

        # 1x1でチャンネル数を増やす。
        filters = input_filters * expand_ratio
        if expand_ratio != 1:

            x = layers.Conv2D(filters=filters,kernel_size=1,strides=1,kernel_initializer=CONV_KERNEL_INITIALIZER,padding="same",data_format=backend.image_data_format(),use_bias=False,name=f"{name}expand_conv",)(inputs)
            x = layers.BatchNormalization(axis=bn_axis,momentum=bn_momentum,name=f"{name}expand_bn",)(x)
            x = layers.Activation(activation, name=f"{name}expand_activation")(x)
        else:
            x = inputs

        # Depthwise で空間方向にのみ畳み込み
        x = layers.DepthwiseConv2D(kernel_size=kernel_size,strides=strides,depthwise_initializer=CONV_KERNEL_INITIALIZER,padding="same",data_format=backend.image_data_format(),use_bias=False,name=f"{name}dwconv2",)(x)
        x = layers.BatchNormalization(axis=bn_axis, momentum=bn_momentum, name=f"{name}bn")(x)
        x = layers.Activation(activation, name=f"{name}activation")(x)


        # Squeeze and Excitation SE層でチャンネル重要度の決定
        if 0 < se_ratio <= 1:
            filters_se = max(1, int(input_filters * se_ratio))
            se = layers.GlobalAveragePooling2D(name=f"{name}se_squeeze")(x)
            if bn_axis == 1:
                se_shape = (filters, 1, 1)
            else:
                se_shape = (1, 1, filters)
            se = layers.Reshape(se_shape, name=f"{name}se_reshape")(se)

            se = layers.Conv2D(filters_se,1,padding="same",activation=activation,kernel_initializer=CONV_KERNEL_INITIALIZER,name=f"{name}se_reduce", )(se)
            se = layers.Conv2D(filters,1,padding="same",activation="sigmoid",kernel_initializer=CONV_KERNEL_INITIALIZER,name=f"{name}se_expand", )(se)

            x = layers.multiply([x, se], name=f"{name}se_excite")

        
        # 1x1畳み込みでチャンネル戻す
        x = layers.Conv2D(filters=output_filters,kernel_size=1,strides=1,kernel_initializer=CONV_KERNEL_INITIALIZER,padding="same",data_format=backend.image_data_format(),use_bias=False,name=f"{name}project_conv",)(x)
        x = layers.BatchNormalization(axis=bn_axis, momentum=bn_momentum, name=f"{name}project_bn")(x)

        if strides == 1 and input_filters == output_filters:
            if survival_probability:
                x = layers.Dropout(survival_probability, noise_shape=(None, 1, 1, 1),name=f"{name}drop",)(x)

            x = layers.add([x, inputs], name=f"{name}add")

        return x

    return apply


# Fused-MBConv 3x3畳み込み → 1x1 チャンネル圧縮 
def FusedMBConvBlock(
    input_filters,
    output_filters,
    expand_ratio=1,
    kernel_size=3,
    strides=1,
    se_ratio=0.0,
    bn_momentum=0.9,
    activation="swish",
    survival_probability=0.8,
    name=None,
):
    """Fuses the proj conv1x1 and depthwise_conv into a conv2d."""
    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    if name is None:
        name = backend.get_uid("block0")

    def apply(inputs):

        # 3x3畳み込みをする。
        filters = input_filters * expand_ratio
        if expand_ratio != 1:
            x = layers.Conv2D(filters,kernel_size=kernel_size,strides=strides,kernel_initializer=CONV_KERNEL_INITIALIZER,data_format=backend.image_data_format(),padding="same",use_bias=False,name=f"{name}expand_conv",
            )(inputs)
            x = layers.BatchNormalization(axis=bn_axis, momentum=bn_momentum, name=f"{name}expand_bn" )(x)
            x = layers.Activation( activation=activation, name=f"{name}expand_activation" )(x)
        else:
            x = inputs


        # SE 層でチャンネル間の重要度の決定
        if 0 < se_ratio <= 1:
            filters_se = max(1, int(input_filters * se_ratio))
            se = layers.GlobalAveragePooling2D(name=f"{name}se_squeeze")(x)
            if bn_axis == 1:
                se_shape = (filters, 1, 1)
            else:
                se_shape = (1, 1, filters)

            se = layers.Reshape(se_shape, name=f"{name}se_reshape")(se)

            se = layers.Conv2D(filters_se,1,padding="same",activation=activation,kernel_initializer=CONV_KERNEL_INITIALIZER,name=f"{name}se_reduce",)(se)
            se = layers.Conv2D(filters,1,padding="same",activation="sigmoid",kernel_initializer=CONV_KERNEL_INITIALIZER,name=f"{name}se_expand",)(se)

            x = layers.multiply([x, se], name=f"{name}se_excite")


        # 1x1でチャンネル圧縮
        x = layers.Conv2D(output_filters,kernel_size=1 if expand_ratio != 1 else kernel_size,strides=1 if expand_ratio != 1 else strides,kernel_initializer=CONV_KERNEL_INITIALIZER,padding="same",use_bias=False,name=f"{name}project_conv",)(x)
        x = layers.BatchNormalization(axis=bn_axis, momentum=bn_momentum, name=f"{name}project_bn")(x)

        if expand_ratio == 1:
            x = layers.Activation(activation=activation, name=f"{name}project_activation")(x)

        if strides == 1 and input_filters == output_filters:
            if survival_probability:
                x = layers.Dropout(survival_probability,noise_shape=(None, 1, 1, 1),name=f"{name}drop",)(x)

            x = layers.add([x, inputs], name=f"{name}add")
        return x

    return apply


# EfficientNetV2 本体
def EfficientNetV2(
    width_coefficient,
    depth_coefficient,
    default_size,
    dropout_rate=0.2,
    drop_connect_rate=0.2,
    depth_divisor=8,
    min_depth=8,
    bn_momentum=0.9,
    activation="swish",
    blocks_args="default",
    name="efficientnetv2",
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
    weights_name=None,
):

    if blocks_args == "default":
        blocks_args = DEFAULT_BLOCKS_ARGS[name]

    if not (weights in {"imagenet", None} or file_utils.exists(weights)):
        raise ValueError(
            "The `weights` argument should be either "
            "`None` (random initialization), `imagenet` "
            "(pre-training on ImageNet), "
            "or the path to the weights file to be loaded."
            f"Received: weights={weights}"
        )

    if weights == "imagenet" and include_top and classes != 1000:
        raise ValueError(
            'If using `weights="imagenet"` with `include_top`'
            " as true, `classes` should be 1000"
        )

    # Determine proper input shape
    input_shape = imagenet_utils.obtain_input_shape(
        input_shape,
        default_size=default_size,
        min_size=32,
        data_format=backend.image_data_format(),
        require_flatten=include_top,
        weights=weights,
    )

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == "channels_last" else 1

    x = img_input

    if include_preprocessing:
        # Apply original V1 preprocessing for Bx variants
        # if number of channels allows it
        num_channels = input_shape[bn_axis - 1]
        if name.split("-")[-1].startswith("b") and num_channels == 3:
            x = layers.Rescaling(scale=1.0 / 255)(x)
            mean = [0.485, 0.456, 0.406]
            variance = [0.229**2, 0.224**2, 0.225**2]
            x = layers.Normalization(
                mean=mean,
                variance=variance,
                axis=bn_axis,
            )(x)
        else:
            x = layers.Rescaling(scale=1.0 / 128.0, offset=-1)(x)

    # Build stem
    stem_filters = round_filters(
        filters=blocks_args[0]["input_filters"],
        width_coefficient=width_coefficient,
        min_depth=min_depth,
        depth_divisor=depth_divisor,
    )

    x = layers.Conv2D(filters=stem_filters,kernel_size=3,strides=2,kernel_initializer=CONV_KERNEL_INITIALIZER,padding="same",use_bias=False,name="stem_conv", )(x)
    x = layers.BatchNormalization(axis=bn_axis,momentum=bn_momentum,name="stem_bn", )(x)
    x = layers.Activation(activation, name="stem_activation")(x)

    # Build blocks
    blocks_args = copy.deepcopy(blocks_args)
    b = 0
    blocks = float(sum(args["num_repeat"] for args in blocks_args))

    for i, args in enumerate(blocks_args):
        if args["num_repeat"] <= 0:
            raise ValueError(
                f"The number of repeats in `EfficientNetV2` must be > 0. "
                f"Received: num_repeat={args['num_repeat']}"
            )

        # Update block input and output filters based on depth multiplier.
        args["input_filters"] = round_filters(
            filters=args["input_filters"],
            width_coefficient=width_coefficient,
            min_depth=min_depth,
            depth_divisor=depth_divisor,
        )
        args["output_filters"] = round_filters(
            filters=args["output_filters"],
            width_coefficient=width_coefficient,
            min_depth=min_depth,
            depth_divisor=depth_divisor,
        )

        # Determine which conv type to use:
        block = {0: MBConvBlock, 1: FusedMBConvBlock}[args.pop("conv_type")]
        repeats = round_repeats(
            repeats=args.pop("num_repeat"), depth_coefficient=depth_coefficient
        )
        for j in range(repeats):
            # The first block needs to take care of stride and filter size
            # increase.
            if j > 0:
                args["strides"] = 1
                args["input_filters"] = args["output_filters"]

            x = block(
                activation=activation,
                bn_momentum=bn_momentum,
                survival_probability=drop_connect_rate * b / blocks,
                name=f"block{i + 1}{chr(j + 97)}_",
                **args,
            )(x)
            b += 1

    # Build top
    top_filters = round_filters(
        filters=1280,
        width_coefficient=width_coefficient,
        min_depth=min_depth,
        depth_divisor=depth_divisor,
    )
    x = layers.Conv2D(
        filters=top_filters,
        kernel_size=1,
        strides=1,
        kernel_initializer=CONV_KERNEL_INITIALIZER,
        padding="same",
        data_format=backend.image_data_format(),
        use_bias=False,
        name="top_conv",
    )(x)
    x = layers.BatchNormalization(
        axis=bn_axis,
        momentum=bn_momentum,
        name="top_bn",
    )(x)
    x = layers.Activation(activation=activation, name="top_activation")(x)

    if include_top:
        x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name="top_dropout")(x)
        imagenet_utils.validate_activation(classifier_activation, weights)
        x = layers.Dense(
            classes,
            activation=classifier_activation,
            kernel_initializer=DENSE_KERNEL_INITIALIZER,
            bias_initializer=initializers.Constant(0.0),
            name="predictions",
        )(x)
    else:
        if pooling == "avg":
            x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
        elif pooling == "max":
            x = layers.GlobalMaxPooling2D(name="max_pool")(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = operation_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Functional(inputs, x, name=name)

    # Load weights.
    if weights == "imagenet":
        if include_top:
            file_suffix = ".h5"
            file_hash = WEIGHTS_HASHES[weights_name][0]
        else:
            file_suffix = "_notop.h5"
            file_hash = WEIGHTS_HASHES[weights_name][1]
        file_name = name + file_suffix
        weights_path = file_utils.get_file(
            file_name,
            BASE_WEIGHTS_PATH + file_name,
            cache_subdir="models",
            file_hash=file_hash,
        )
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model


@keras_export(
    [
        "keras.applications.efficientnet_v2.EfficientNetV2B0",
        "keras.applications.EfficientNetV2B0",
    ]
)
def EfficientNetV2B0(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
    name="efficientnetv2-b0",
):
    return EfficientNetV2(
        width_coefficient=1.0,
        depth_coefficient=1.0,
        default_size=224,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        include_preprocessing=include_preprocessing,
        weights_name="b0",
    )


@keras_export(
    [
        "keras.applications.efficientnet_v2.EfficientNetV2B1",
        "keras.applications.EfficientNetV2B1",
    ]
)
def EfficientNetV2B1(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
    name="efficientnetv2-b1",
):
    return EfficientNetV2(
        width_coefficient=1.0,
        depth_coefficient=1.1,
        default_size=240,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        include_preprocessing=include_preprocessing,
        weights_name="b1",
    )


@keras_export(
    [
        "keras.applications.efficientnet_v2.EfficientNetV2B2",
        "keras.applications.EfficientNetV2B2",
    ]
)
def EfficientNetV2B2(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
    name="efficientnetv2-b2",
):
    return EfficientNetV2(
        width_coefficient=1.1,
        depth_coefficient=1.2,
        default_size=260,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        include_preprocessing=include_preprocessing,
        weights_name="b2",
    )


@keras_export(
    [
        "keras.applications.efficientnet_v2.EfficientNetV2B3",
        "keras.applications.EfficientNetV2B3",
    ]
)
def EfficientNetV2B3(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
    name="efficientnetv2-b3",
):
    return EfficientNetV2(
        width_coefficient=1.2,
        depth_coefficient=1.4,
        default_size=300,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        include_preprocessing=include_preprocessing,
        weights_name="b3",
    )


@keras_export(
    [
        "keras.applications.efficientnet_v2.EfficientNetV2S",
        "keras.applications.EfficientNetV2S",
    ]
)
def EfficientNetV2S(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
    name="efficientnetv2-s",
):
    return EfficientNetV2(
        width_coefficient=1.0,
        depth_coefficient=1.0,
        default_size=384,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        include_preprocessing=include_preprocessing,
        weights_name="-s",
    )


@keras_export(
    [
        "keras.applications.efficientnet_v2.EfficientNetV2M",
        "keras.applications.EfficientNetV2M",
    ]
)
def EfficientNetV2M(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
    name="efficientnetv2-m",
):
    return EfficientNetV2(
        width_coefficient=1.0,
        depth_coefficient=1.0,
        default_size=480,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        include_preprocessing=include_preprocessing,
        weights_name="-m",
    )


@keras_export(
    [
        "keras.applications.efficientnet_v2.EfficientNetV2L",
        "keras.applications.EfficientNetV2L",
    ]
)
def EfficientNetV2L(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
    include_preprocessing=True,
    name="efficientnetv2-l",
):
    return EfficientNetV2(
        width_coefficient=1.0,
        depth_coefficient=1.0,
        default_size=480,
        name=name,
        include_top=include_top,
        weights=weights,
        input_tensor=input_tensor,
        input_shape=input_shape,
        pooling=pooling,
        classes=classes,
        classifier_activation=classifier_activation,
        include_preprocessing=include_preprocessing,
        weights_name="-l",
    )

EfficientNetV2B0.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B0")
EfficientNetV2B1.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B1")
EfficientNetV2B2.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B2")
EfficientNetV2B3.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2B3")
EfficientNetV2S.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2S")
EfficientNetV2M.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2M")
EfficientNetV2L.__doc__ = BASE_DOCSTRING.format(name="EfficientNetV2L")

@keras_export("keras.applications.efficientnet_v2.preprocess_input")
def preprocess_input(x, data_format=None):
    return x

@keras_export("keras.applications.efficientnet_v2.decode_predictions")
def decode_predictions(preds, top=5):
    return imagenet_utils.decode_predictions(preds, top=top)

decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__
  • MBConv : 重要とされる特徴を見つける工程
  • FusedMBConv : 見つけた重要な特徴を一気に計算する工程。

この点はPytorchのEfficientNetV2と変わらず。

異なる点は、Pytorch版はSiLU関数を使用し、Keras版はSwish関数を使っている点。と思いきやSiLU関数とSwish関数は等価である点に注意。(GoogleによってSwishと呼ばれている。)

数式: f(x)=x⋅σ(x) (σ はシグモイド関数)
スポンサーリンク