diff options
| author | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2023-09-03 01:14:16 +0200 |
|---|---|---|
| committer | Gustaf Rydholm <gustaf.rydholm@gmail.com> | 2023-09-03 01:14:16 +0200 |
| commit | d020059f2f71fe7c25765dde9d535195c09ece01 (patch) | |
| tree | fbef1ff2b5d4475e6d3117575c61bde77802d07d /text_recognizer/network/vit.py | |
| parent | c66ca7420560e9ab0e8893f6c7d6a496f128c79a (diff) | |
Update imports
Diffstat (limited to 'text_recognizer/network/vit.py')
| -rw-r--r-- | text_recognizer/network/vit.py | 11 |
1 files changed, 5 insertions, 6 deletions
diff --git a/text_recognizer/network/vit.py b/text_recognizer/network/vit.py index b6203d7..1fbf3fc 100644 --- a/text_recognizer/network/vit.py +++ b/text_recognizer/network/vit.py @@ -4,10 +4,10 @@ from typing import Type from einops.layers.torch import Rearrange from torch import Tensor, nn -from text_recognizer.network.transformer.embedding.token import TokenEmbedding -from text_recognizer.network.transformer.embedding.sincos import sincos_2d -from text_recognizer.network.transformer.decoder import Decoder -from text_recognizer.network.transformer.encoder import Encoder +from .transformer.embedding.token import TokenEmbedding +from .transformer.embedding.sincos import sincos_2d +from .transformer.decoder import Decoder +from .transformer.encoder import Encoder class VisionTransformer(nn.Module): @@ -59,11 +59,10 @@ class VisionTransformer(nn.Module): def decode(self, text: Tensor, img_features: Tensor) -> Tensor: text = text.long() - # TODO: add mask to decoder mask = text != self.pad_index tokens = self.token_embedding(text) tokens = tokens + self.pos_embedding(tokens) - output = self.decoder(tokens, context=img_features) + output = self.decoder(tokens, context=img_features, mask=mask) return self.to_logits(output) def forward( |