from __future__ import unicode_literals, print_function, division
import torch
import torch.nn as nn
import numpy as np
from math import sqrt
from torch.nn import Parameter
from torch.nn.utils import weight_norm
from utils.masking import TriangularCausalMask,ProbMask
class Chomp1d(nn.Module):
def __init__(self, chomp_size):
super(Chomp1d, self).__init__()
self.chomp_size = chomp_size
def forward(self, x):
"""
其实这就是一个裁剪的模块,裁剪多出来的padding
"""
return x[:, :, :-self.chomp_size].contiguous()
class Causal_Conv(nn.Module):
def __init__(self, n_inputs, n_outputs, kernel_size, stride=1, dilation=1, padding=None, dropout=0.2):
"""
相当于一个Residual block
:param n_inputs: int, 输入通道数
:param n_outputs: int, 输出通道数
:param kernel_size: int, 卷积核尺寸
:param stride: int, 步长,一般为1
:param dilation: int, 膨胀系数
:param padding: int, 填充系数
:param dropout: float, dropout比率
"""
super(Causal_Conv, self).__init__()
padding = padding or (kernel_size-1) * dilation
self.conv = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
stride=stride, padding=padding, dilation=dilation))
# 经过conv1,输出的size其实是(Batch, input_channel, seq_len + padding)
self.chomp = Chomp1d(padding) # 裁剪掉多出来的padding部分,维持输出时间步为seq_len
self.relu = nn.ReLU()
self.dropout = nn.Dropout(dropout)
self.net = nn.Sequential(self.conv, self.chomp, self.relu, self.dropout)
# self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
# self.relu = nn.ReLU()
self.init_weights()
def init_weights(self):
"""
参数初始化
:return:
"""
self.conv.weight.data.normal_(0, 0.01)
def forward(self, x):
"""
:param x: size of (Batch, input_channel, seq_len)
:return:
"""
out = self.net(x)
# res = x if self.downsample is None else self.downsample(x)
return out
class FullAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(FullAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def forward(self, queries, keys, values, attn_mask):
"""
:param queries:张量,维度是(B,L,H,E),其中B是Batch_size,L是序列长度,H:header数,E:特征维度
:param keys:张量,维度是(B,S,H,D),其中B是Batch_size,S是序列长度,H:header数,D:特征维度
:param values:张量,维度是(B,S,H,D),其中B是Batch_size,S是序列长度,H:header数,D:特征维度
:param attn_mask:掩码
:return:content:返回加权得到的values
A:注意力分布
"""
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1. / sqrt(E)
scores = torch.einsum("blhe,bshe->bhls", queries, keys) # scores:(b,h,l,s)
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1)) # A:(b,h,l,s)
V = torch.einsum("bhls,bshd->blhd", A, values) # V:(b,l,h,d)
if self.output_attention:
return (V.contiguous(), A)
else:
return (V.contiguous(), None)
class AttentionLayer(nn.Module):
def __init__(self, attention, d_model, n_heads,kernel_size=3, d_keys=None,
d_values=None):
super(AttentionLayer, self).__init__()
d_keys = d_keys or (d_model//n_heads)
d_values = d_values or (d_model//n_heads)
self.inner_attention = attention
# self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.query_projection = Causal_Conv(d_model,d_keys * n_heads,kernel_size)
# self.key_projection = nn.Linear(d_model, d_keys * n_heads)
# self.key_projection = Causal_Conv(d_model,d_keys,kernel_size)
self.key_projection = self.query_projection
# self.value_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Conv1d(d_model, d_values * n_heads,1)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
def forward(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1) # 张量:(B,L,H,d_keys)
keys = self.key_projection(keys).view(B, S, H, -1) # 张量:(B,S,H,d_keys)
values = self.value_projection(values).view(B, S, H, -1) # 张量:(B,S,H,d_keys)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
) # out:(b,l,h,d),attn:(b,h,l,s)
out = out.view(B, L, -1) # out:(b,l,h*d)
return self.out_projection(out), attn #(b,l,d_model),attn:(b,h,l,s)
class ProbAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(ProbAttention, self).__init__()
self.factor = factor
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
# Q [B, H, L, D]
B, H, L_K, E = K.shape
_, _, L_Q, _ = Q.shape
# calculate the sampled Q_K
K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
# find the Top_k query with sparisty measurement
M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
M_top = M.topk(n_top, sorted=False)[1]
# use the reduced Q to calculate Q_K
Q_reduce = Q[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
M_top, :] # factor*ln(L_q)
Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
return Q_K, M_top
def _get_initial_context(self, V, L_Q):
B, H, L_V, D = V.shape
if not self.mask_flag:
# V_sum = V.sum(dim=-2)
V_sum = V.mean(dim=-2)
contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
else: # use mask
assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
contex = V.cumsum(dim=-2)
return contex
def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
B, H, L_V, D = V.shape
if self.mask_flag:
attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
context_in[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
index, :] = torch.matmul(attn, V)
if
没有合适的资源?快使用搜索试试~ 我知道了~
基于Transformer模型的时间序列预测python源码(高分项目).zip

共12个文件
py:11个
ipynb:1个

1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉

温馨提示
基于Transformer模型的时间序列预测python源码(高分项目).zip本资源中的源码都是经过本地编译过可运行的,资源项目的难度比较适中,内容都是经过助教老师审定过的能够满足学习、使用需求,如果有需要的话可以放心下载使用。 基于Transformer模型的时间序列预测python源码(高分项目).zip本资源中的源码都是经过本地编译过可运行的,资源项目的难度比较适中,内容都是经过助教老师审定过的能够满足学习、使用需求,如果有需要的话可以放心下载使用。 基于Transformer模型的时间序列预测python源码(高分项目).zip本资源中的源码都是经过本地编译过可运行的,资源项目的难度比较适中,内容都是经过助教老师审定过的能够满足学习、使用需求,如果有需要的话可以放心下载使用。基于Transformer模型的时间序列预测python源码(高分项目).zip本资源中的源码都是经过本地编译过可运行的,资源项目的难度比较适中,内容都是经过助教老师审定过的能够满足学习、使用需求,如果有需要的话可以放心下载使用。基于Transformer模型的时间序列预测python源码(高分项目
资源推荐
资源详情
资源评论























收起资源包目录

















共 12 条
- 1

盈梓的博客
- 粉丝: 1w+
上传资源 快速赚钱
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助


最新资源
- modelcontextprotocol_kotlin-sdk-Kotlin资源
- nexfly-AI人工智能资源
- cpp-tbox-硬件开发资源
- 射频功率放大器的研究与设计.pdf
- 基于 PyQt5 的轮廓获取、人脸识别与图像分类系统
- MatterMap-移动应用开发资源
- GOSP-单片机开发资源
- 电子工程基于SAR ADC的抗混叠巴特沃斯低通滤波器设计:滤波器电路及参数计算方法
- 全双工485通信系统原理示意图
- EasyDL 定制化图像识别之图片分类功能
- wnmp PHP集成环境-PHP资源
- layui-JavaScript资源
- ERD-ONLINE-SQL资源
- WeBlog-毕业设计资源
- ACM算法竞赛-ACM资源
- Mathematical Modeling-美赛资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈



安全验证
文档复制为VIP权益,开通VIP直接复制

- 1
- 2
前往页