From dedf8deb025ac9efdad5e9baf9165ef63d6829ff Mon Sep 17 00:00:00 2001 From: Gustaf Rydholm Date: Mon, 5 Apr 2021 23:24:20 +0200 Subject: Pre-commit fixes, optimizer loading fix --- text_recognizer/networks/image_transformer.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'text_recognizer/networks/image_transformer.py') diff --git a/text_recognizer/networks/image_transformer.py b/text_recognizer/networks/image_transformer.py index aa024e0..85a84d2 100644 --- a/text_recognizer/networks/image_transformer.py +++ b/text_recognizer/networks/image_transformer.py @@ -1,9 +1,9 @@ """A Transformer with a cnn backbone. The network encodes a image with a convolutional backbone to a latent representation, -i.e. feature maps. A 2d positional encoding is applied to the feature maps for +i.e. feature maps. A 2d positional encoding is applied to the feature maps for spatial information. The resulting feature are then set to a transformer decoder -together with the target tokens. +together with the target tokens. TODO: Local attention for transformer.j @@ -107,9 +107,7 @@ class ImageTransformer(nn.Module): encoder_class = getattr(network_module, encoder.type) return encoder_class(**encoder.args) - def _configure_mapping( - self, mapping: str - ) -> Tuple[List[str], Dict[str, int]]: + def _configure_mapping(self, mapping: str) -> Tuple[List[str], Dict[str, int]]: """Configures mapping.""" if mapping == "emnist": mapping, inverse_mapping, _ = emnist_mapping() @@ -125,7 +123,7 @@ class ImageTransformer(nn.Module): Tensor: Image features. Shapes: - - image: :math: `(B, C, H, W)` + - image: :math: `(B, C, H, W)` - latent: :math: `(B, T, C)` """ -- cgit v1.2.3-70-g09d2