diff --git a/lee_transformers/layers/adaptive_computation_time.py b/lee_transformers/layers/adaptive_computation_time.py index 8209635dea40778f9efacd965a3d673636398fdd..d0b5ab640ba55c0df5ac749ed64068c214173c9b 100644 --- a/lee_transformers/layers/adaptive_computation_time.py +++ b/lee_transformers/layers/adaptive_computation_time.py @@ -9,7 +9,7 @@ from typing import Any, cast, Optional, Tuple import torch import torch as th -from ..typing import ActivationFn +from ..typing_utils import ActivationFn class AdaptiveComputationTime(th.nn.Module): diff --git a/lee_transformers/layers/glu_layers.py b/lee_transformers/layers/glu_layers.py index 46e2c1e460ac8dc28a7793c1cb1d377472aeebe3..d6ee6dca743a716e3960a8b1ee963f93fd7a014c 100644 --- a/lee_transformers/layers/glu_layers.py +++ b/lee_transformers/layers/glu_layers.py @@ -3,7 +3,7 @@ from typing import Optional import torch import torch as th -from ..typing import ActivationFn +from ..typing_utils import ActivationFn class GLULayer(th.nn.Module): diff --git a/lee_transformers/layers/resampling.py b/lee_transformers/layers/resampling.py index 85f6dda90402a4c5332fadcb70ff98ad31b78954..7565d04218932883a068341f4e5bbac3855017d3 100644 --- a/lee_transformers/layers/resampling.py +++ b/lee_transformers/layers/resampling.py @@ -6,7 +6,7 @@ import torch as th from .glu_layers import GLULayer from .rms_norm import RMSNorm -from ..typing import ActivationFn +from ..typing_utils import ActivationFn def shift_right( diff --git a/lee_transformers/models/lpe_transformer.py b/lee_transformers/models/lpe_transformer.py index 692f405f9cb392fb3522627adc529b6b046e32cc..ffa2047c315b6ff03d2f9f3ae2b3bd6eb2d49f02 100644 --- a/lee_transformers/models/lpe_transformer.py +++ b/lee_transformers/models/lpe_transformer.py @@ -7,7 +7,7 @@ import torch as th from . import common from ..layers import GLULayer, RMSNorm -from ..typing import ActivationFn, Decoder, DecoderLayer +from ..typing_utils import ActivationFn, Decoder, DecoderLayer class LPETransformer(th.nn.Module): diff --git a/lee_transformers/models/rpe_transformer.py b/lee_transformers/models/rpe_transformer.py index 31bfce1ed9cfbf3831167f1018e5812035554663..3fc4e0b8fa8f2bfb15c891d3623fbf078458c20c 100644 --- a/lee_transformers/models/rpe_transformer.py +++ b/lee_transformers/models/rpe_transformer.py @@ -7,7 +7,7 @@ import torch as th from . import common from ..layers import GLULayer, RMSNorm, RPEMultiheadAttention -from ..typing import ActivationFn, Decoder, DecoderLayer +from ..typing_utils import ActivationFn, Decoder, DecoderLayer # Encoder self-attention block (private method of diff --git a/lee_transformers/models/transformer.py b/lee_transformers/models/transformer.py index d270ae3721342ffc7da1b048f46a65866169bff2..151da336a7073eb04e5a65e47d8145a903350d47 100644 --- a/lee_transformers/models/transformer.py +++ b/lee_transformers/models/transformer.py @@ -7,7 +7,7 @@ import torch as th from . import common from ..layers import GLULayer, RMSNorm -from ..typing import ActivationFn, Decoder, DecoderLayer +from ..typing_utils import ActivationFn, Decoder, DecoderLayer class Transformer(th.nn.Module): diff --git a/lee_transformers/typing.py b/lee_transformers/typing_utils.py similarity index 100% rename from lee_transformers/typing.py rename to lee_transformers/typing_utils.py