diff --git a/lee_transformers/layers/adaptive_computation_time.py b/lee_transformers/layers/adaptive_computation_time.py index 8209635dea40778f9efacd965a3d673636398fdd..d0b5ab640ba55c0df5ac749ed64068c214173c9b 100644 --- a/lee_transformers/layers/adaptive_computation_time.py +++ b/lee_transformers/layers/adaptive_computation_time.py @@ -9,7 +9,7 @@ from typing import Any, cast, Optional, Tuple import torch import torch as th -from ..typing import ActivationFn +from ..typing_utils import ActivationFn class AdaptiveComputationTime(th.nn.Module): diff --git a/lee_transformers/layers/glu_layers.py b/lee_transformers/layers/glu_layers.py index 46e2c1e460ac8dc28a7793c1cb1d377472aeebe3..d6ee6dca743a716e3960a8b1ee963f93fd7a014c 100644 --- a/lee_transformers/layers/glu_layers.py +++ b/lee_transformers/layers/glu_layers.py @@ -3,7 +3,7 @@ from typing import Optional import torch import torch as th -from ..typing import ActivationFn +from ..typing_utils import ActivationFn class GLULayer(th.nn.Module): diff --git a/lee_transformers/layers/resampling.py b/lee_transformers/layers/resampling.py index 85f6dda90402a4c5332fadcb70ff98ad31b78954..7565d04218932883a068341f4e5bbac3855017d3 100644 --- a/lee_transformers/layers/resampling.py +++ b/lee_transformers/layers/resampling.py @@ -6,7 +6,7 @@ import torch as th from .glu_layers import GLULayer from .rms_norm import RMSNorm -from ..typing import ActivationFn +from ..typing_utils import ActivationFn def shift_right( diff --git a/lee_transformers/models/hierarchical_transformer.py b/lee_transformers/models/hierarchical_transformer.py index 06d382763c7e1eede242c469c0008c09d0a9218e..b1b45c314b7ee34769eac679c7a9d57a2e106cde 100644 --- a/lee_transformers/models/hierarchical_transformer.py +++ b/lee_transformers/models/hierarchical_transformer.py @@ -32,7 +32,7 @@ from ..layers import ( RMSNorm, shift_right, ) -from ..typing import ActivationFn +from ..typing_utils import ActivationFn OptionalMaskList = Union[List[torch.Tensor], List[None], bool, None] diff --git a/lee_transformers/models/lpe_transformer.py b/lee_transformers/models/lpe_transformer.py index bbf0b4c09367293862ad3e7f4a1c73f7ee7fac17..d59ae98d046357b442b772ff544ae15dcb00d8e4 100644 --- a/lee_transformers/models/lpe_transformer.py +++ b/lee_transformers/models/lpe_transformer.py @@ -7,7 +7,7 @@ import torch as th from . import common from ..layers import GLULayer, RMSNorm -from ..typing import ActivationFn, Decoder, DecoderLayer +from ..typing_utils import ActivationFn, Decoder, DecoderLayer class LPETransformer(th.nn.Module): diff --git a/lee_transformers/models/rpe_transformer.py b/lee_transformers/models/rpe_transformer.py index ac90f565999beae4b51ea6cc8443b78b31c1e763..84bf2c9f55c928741105132e9ecde11b0d891ef5 100644 --- a/lee_transformers/models/rpe_transformer.py +++ b/lee_transformers/models/rpe_transformer.py @@ -7,7 +7,7 @@ import torch as th from . import common from ..layers import GLULayer, RMSNorm, RPEMultiheadAttention -from ..typing import ActivationFn, Decoder, DecoderLayer +from ..typing_utils import ActivationFn, Decoder, DecoderLayer # Encoder self-attention block (private method of diff --git a/lee_transformers/models/transformer.py b/lee_transformers/models/transformer.py index 9053b0b188be30805e46155b6f055bb8fff726cd..f7651f15d69eabcc9e43135e05331fabb85e1a0e 100644 --- a/lee_transformers/models/transformer.py +++ b/lee_transformers/models/transformer.py @@ -7,7 +7,7 @@ import torch as th from . import common from ..layers import GLULayer, RMSNorm -from ..typing import ActivationFn, Decoder, DecoderLayer +from ..typing_utils import ActivationFn, Decoder, DecoderLayer class Transformer(th.nn.Module): diff --git a/lee_transformers/typing.py b/lee_transformers/typing_utils.py similarity index 100% rename from lee_transformers/typing.py rename to lee_transformers/typing_utils.py