Pytorch 实现注意力机制

Isabel ·

更新时间:2024-09-21

· 729 次阅读


import math
import torch 
import torch.nn as nn

import os
def file_name_walk(file_dir):
    for root, dirs, files in os.walk(file_dir):
#         print("root", root)  # 当前目录路径
         print("dirs", dirs)  # 当前路径下所有子目录
         print("files", files)  # 当前路径下所有非目录子文件
file_name_walk("/home/kesci/input/fraeng6506")
Softmax屏蔽

def SequenceMask(X, X_len,value=-1e6):
    maxlen = X.size(1)
    #print(X.size(),torch.arange((maxlen),dtype=torch.float)[None, :],'\n',X_len[:, None] )
    mask = torch.arange((maxlen),dtype=torch.float)[None, :] >= X_len[:, None]   
    #print(mask)
    X[mask]=value
    return X

def masked_softmax(X, valid_length):
    # X: 3-D tensor, valid_length: 1-D or 2-D tensor
    softmax = nn.Softmax(dim=-1)
    if valid_length is None:
        return softmax(X)
    else:
        shape = X.shape
        if valid_length.dim() == 1:
            try:
                valid_length = torch.FloatTensor(valid_length.numpy().repeat(shape[1], axis=0))#[2,2,3,3]
            except:
                valid_length = torch.FloatTensor(valid_length.cpu().numpy().repeat(shape[1], axis=0))#[2,2,3,3]
        else:
            valid_length = valid_length.reshape((-1,))
        # fill masked elements with a large negative, whose exp is 0
        X = SequenceMask(X.reshape((-1, shape[-1])), valid_length)
        return softmax(X).reshape(shape)

masked_softmax(torch.rand((2,2,4),dtype=torch.float), torch.FloatTensor([2,3]))

torch.bmm(torch.ones((2,1,3), dtype = torch.float), torch.ones((2,3,2), dtype = torch.float))
点积注意力

# Save to the d2l package.
class DotProductAttention(nn.Module): 
    def __init__(self, dropout, **kwargs):
        super(DotProductAttention, self).__init__(**kwargs)
        self.dropout = nn.Dropout(dropout)
    # query: (batch_size, #queries, d)
    # key: (batch_size, #kv_pairs, d)
    # value: (batch_size, #kv_pairs, dim_v)
    # valid_length: either (batch_size, ) or (batch_size, xx)
    def forward(self, query, key, value, valid_length=None):
        d = query.shape[-1]
        # set transpose_b=True to swap the last two dimensions of key
        scores = torch.bmm(query, key.transpose(1,2)) / math.sqrt(d)
        attention_weights = self.dropout(masked_softmax(scores, valid_length))
        print("attention_weight\n",attention_weights)
        return torch.bmm(attention_weights, value)
测试

atten = DotProductAttention(dropout=0)
keys = torch.ones((2,10,2),dtype=torch.float)
values = torch.arange((40), dtype=torch.float).view(1,10,4).repeat(2,1,1)
atten(torch.ones((2,1,2),dtype=torch.float), keys, values, torch.FloatTensor([2, 6]))
多层感知机注意力

# Save to the d2l package.
class MLPAttention(nn.Module):  
    def __init__(self, units,ipt_dim,dropout, **kwargs):
        super(MLPAttention, self).__init__(**kwargs)
        # Use flatten=True to keep query's and key's 3-D shapes.
        self.W_k = nn.Linear(ipt_dim, units, bias=False)
        self.W_q = nn.Linear(ipt_dim, units, bias=False)
        self.v = nn.Linear(units, 1, bias=False)
        self.dropout = nn.Dropout(dropout)
    def forward(self, query, key, value, valid_length):
        query, key = self.W_k(query), self.W_q(key)
        #print("size",query.size(),key.size())
        # expand query to (batch_size, #querys, 1, units), and key to
        # (batch_size, 1, #kv_pairs, units). Then plus them with broadcast.
        features = query.unsqueeze(2) + key.unsqueeze(1)
        #print("features:",features.size())  #--------------开启
        scores = self.v(features).squeeze(-1) 
        attention_weights = self.dropout(masked_softmax(scores, valid_length))
        return torch.bmm(attention_weights, value)
测试

atten = MLPAttention(ipt_dim=2,units = 8, dropout=0)
atten(torch.ones((2,1,2), dtype = torch.float), keys, values, torch.FloatTensor([2, 6]))


作者：JasonYuJX
                    
 
                

                            pytorch
                            注意力机制


           
    
    

            
                
                    
                
            
            
                
    
        
            需要 登录 后方可回复, 如果你还没有账号请 注册新账号
        
    
                
            
                
                    
                        相关文章

    
        
            ASP.NET使用ajax实现分页局部刷新页面功能
        
        
            Belle
            2020-06-06
        
    
    
        858
    


    
        
            利用kernel提供的接口打印进程号(pid)
        
        
            Cynthia
            2020-09-01
        
    
    
        568
    


    
        
    
    
        
            如何在conda虚拟环境中配置cuda+cudnn+pytorch深度学习环境
        
        
            Ula
            2023-03-23
        
    
    
        508
    


    
        
    
    
        
            教你两步解决conda安装pytorch时下载速度慢or超时的问题
        
        
            Gamila
            2023-03-31
        
    
    
        351
    


    
        
    
    
        
            Linux下PyTorch安装教程
        
        
            Karima
            2023-03-31
        
    
    
        1255
    


    
        
            Pytorch中的图像增广transforms类和预处理方法
        
        
            Dara
            2023-04-02
        
    
    
        1178
    


    
        
    
    
        
            图文详解如何利用PyTorch实现图像识别
        
        
            Yelena
            2023-04-06
        
    
    
        918
    


    
        
            Pytorch中的model.train() 和 model.eval() 原理与用法解析
        
        
            Gitana
            2023-04-14
        
    
    
        1817
    


    
        
    
    
        
            分布式训练training-operator和pytorch-distributed RANK变量不统一解决
        
        
            Pandora
            2023-04-14
        
    
    
        1631
    


    
        
    
    
        
            基于Pytorch实现分类器的示例详解
        
        
            Viveka
            2023-04-18
        
    
    
        1798
    


    
        
            详解利用Pytorch实现ResNet网络
        
        
            Tia
            2023-04-20
        
    
    
        1496
    


    
        
            PyTorch定义Tensor及索引和切片(最新推荐)
        
        
            Kara
            2023-04-22
        
    
    
        750
    


    
        
            python windows安装cuda+cudnn+pytorch教程
        
        
            Fawn
            2023-05-12
        
    
    
        646
    


    
        
            pytorch版本PSEnet训练并部署方式
        
        
            Adelaide
            2023-05-13
        
    
    
        471
    


    
        
            PyTorch模型转换为ONNX格式实现过程详解
        
        
            Tani
            2023-05-27
        
    
    
        556
    


    
        
            利用Pytorch实现ResNet网络构建及模型训练
        
        
            Tanisha
            2023-05-27
        
    
    
        1264
    


    
        
            Pytorch实现ResNet网络之ResidualBlock残差块
        
        
            Kande
            2023-05-27
        
    
    
        1208
    


    
        
            PyTorch常用函数torch.cat()中dim参数使用说明
        
        
            Tricia
            2023-05-27
        
    
    
        372
    


    
        
            Pytorch基础教程之torchserve模型部署解析
        
        
            Xylona
            2023-07-20
        
    
    
        1007
    


    
        
            PyTorch模型创建与nn.Module构建
        
        
            Radinka
            2023-07-20
        
    
    
        293


        
    
        
            我要提问
        
    
    
        
        
    
        致谢
        
            帮助他人，成就自己。
            人生最大成功就是伸出热情而温暖的双手，尽自己所能去帮助身边的每一个人，只要无私的奉献，就会收获到美好的生活。
            1024问感谢每一位朋友的帮助和支持。
            软件开发网提供编程的基础软件技术培训教程,软件开发编程实例讲解Go,Node,HTML,CSS,Javascript,Python,Java,Ruby,C,PHP,MySQL等软件开发编程语言以及数据开发的基础知识，也提供大量的软件开发在线实例、从入门到精通就在1024问。
        
    
    
        
            
    育儿网
    微养生
    全球行
    美食街
    育儿
    菜谱大全
    海南旅游
    女性
    养狗百科
    星座