Source code for qumphy.models.itransformer

"""
File: qumphy/models/itransformer.py
Project: 22HLT01 QUMPHY
Contact: oskar.pfeffer@ptb.de
Gitlab: https://gitlab.com/qumphy
Description: code implementation from https://github.com/thuml/Time-Series-Library .
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
from qumphy.models.utils.transformer_encdec import Encoder, EncoderLayer
from qumphy.models.utils.selfattention_family import FullAttention, AttentionLayer
from qumphy.models.utils.embed import DataEmbedding_inverted


[docs] class iTransformer(nn.Module): """ Inverted Transformer model for time-series tasks. Paper link: https://arxiv.org/abs/2310.06625 """ def __init__( self, seq_len, num_class, pred_len=0, d_model=64, embed="fixed", freq="s", dropout=0.1, n_heads=8, e_layers=2, d_ff=2048, factor=1, activation="gelu", task_name="classification", enc_in=1, ): """Initialize the iTransformer model. Parameters ---------- seq_len : int Length of the input sequence. num_class : int Number of output classes for classification. pred_len : int Length of the prediction sequence for forecasting tasks. d_model : int Dimension of the transformer model embeddings. embed : str Type of embedding used for the input data. freq : str Frequency string used by the embedding layer. dropout : float Dropout probability. n_heads : int Number of attention heads. e_layers : int Number of encoder layers. d_ff : int Dimension of the feed-forward network in each encoder layer. factor : int Attention factor used by the attention mechanism. activation : str Activation function used in the encoder layers. task_name : str Name of the task. Supported values are "long_term_forecast", "short_term_forecast", "imputation", "anomaly_detection", and "classification". enc_in : int Number of input variables or channels used for classification. """ super(iTransformer, self).__init__() self.task_name = task_name self.seq_len = seq_len self.pred_len = pred_len # Embedding self.enc_embedding = DataEmbedding_inverted( seq_len, d_model, embed, freq, dropout ) # Encoder self.encoder = Encoder( [ EncoderLayer( AttentionLayer( FullAttention( False, factor, attention_dropout=dropout, output_attention=False, ), d_model, n_heads, ), d_model, d_ff, dropout=dropout, activation=activation, ) for layer in range(e_layers) ], norm_layer=torch.nn.LayerNorm(d_model), ) # Decoder if task_name in ["long_term_forecast", "short_term_forecast"]: self.projection = nn.Linear(d_model, pred_len, bias=True) if task_name == "imputation": self.projection = nn.Linear(d_model, seq_len, bias=True) if task_name == "anomaly_detection": self.projection = nn.Linear(d_model, seq_len, bias=True) if task_name == "classification": self.act = F.gelu self.dropout = nn.Dropout(dropout) self.projection = nn.Linear(d_model * enc_in, num_class)
[docs] def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): """Run forecasting with the iTransformer model. Parameters ---------- x_enc : torch.Tensor Encoder input tensor of shape (batch_size, seq_len, num_features). x_mark_enc : torch.Tensor Encoder time feature tensor. x_dec : torch.Tensor Decoder input tensor. This parameter is kept for compatibility. x_mark_dec : torch.Tensor Decoder time feature tensor. This parameter is kept for compatibility. Returns ------- torch.Tensor Forecast output tensor of shape (batch_size, pred_len, num_features). """ means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev _, _, N = x_enc.shape # Embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] # De-Normalization from Non-stationary Transformer dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) return dec_out
[docs] def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): """Run imputation with the iTransformer model. Parameters ---------- x_enc : torch.Tensor Encoder input tensor of shape (batch_size, seq_len, num_features). x_mark_enc : torch.Tensor Encoder time feature tensor. x_dec : torch.Tensor Decoder input tensor. This parameter is kept for compatibility. x_mark_dec : torch.Tensor Decoder time feature tensor. This parameter is kept for compatibility. mask : torch.Tensor Mask tensor indicating missing or observed values. Returns ------- torch.Tensor Imputed output tensor of shape (batch_size, seq_len, num_features). """ means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev _, L, N = x_enc.shape # Embedding enc_out = self.enc_embedding(x_enc, x_mark_enc) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] # De-Normalization from Non-stationary Transformer dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) return dec_out
[docs] def anomaly_detection(self, x_enc): """Run anomaly detection with the iTransformer model. Parameters ---------- x_enc : torch.Tensor Encoder input tensor of shape (batch_size, seq_len, num_features). Returns ------- torch.Tensor Reconstructed output tensor of shape (batch_size, seq_len, num_features). """ means = x_enc.mean(1, keepdim=True).detach() x_enc = x_enc - means stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) x_enc /= stdev _, L, N = x_enc.shape # Embedding enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] # De-Normalization from Non-stationary Transformer dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) return dec_out
[docs] def classification(self, x_enc): """Run classification with the iTransformer model. Parameters ---------- x_enc : torch.Tensor Input tensor of shape (batch_size, channels, seq_len). Returns ------- torch.Tensor Classification logits of shape (batch_size, num_class). """ x_enc = x_enc.permute(0, 2, 1) enc_out = self.enc_embedding(x_enc, None) enc_out, attns = self.encoder(enc_out, attn_mask=None) output = self.act( enc_out ) # the output transformer encoder/decoder embeddings don't include non-linearity output = self.dropout(output) output = output.reshape(output.shape[0], -1) output = self.projection(output) return output
[docs] def forward(self, x_enc, x_mark_enc=None, x_dec=None, x_mark_dec=None, mask=None): """Run a forward pass for the selected task. Parameters ---------- x_enc : torch.Tensor Encoder input tensor. x_mark_enc : torch.Tensor Encoder time feature tensor. x_dec : torch.Tensor Decoder input tensor. x_mark_dec : torch.Tensor Decoder time feature tensor. mask : torch.Tensor Mask tensor used for imputation. Returns ------- torch.Tensor or None Output tensor for the selected task. Returns None if the task name is not supported. """ if ( self.task_name == "long_term_forecast" or self.task_name == "short_term_forecast" ): dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) return dec_out[:, -self.pred_len :, :] # [B, L, D] if self.task_name == "imputation": dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) return dec_out # [B, L, D] if self.task_name == "anomaly_detection": dec_out = self.anomaly_detection(x_enc) return dec_out # [B, L, D] if self.task_name == "classification": dec_out = self.classification(x_enc) return dec_out # [B, N] return None