我想在matlab 上复现一段python代码的功能
主要是将一段DNA序列用(1 2 3 4)编码后提取特征矩阵
然后进行神经网络训练。
这里第一步就是用embedding把数字变成向量
但是我不知道这里的向量究竟是如何得到的?
是依据我输入的数组的数据的关系还是有一个词典进行匹配?(我觉得更有可能是前者,但是不知道具体怎么得到的)
def __init__(self):
super(Prediction, self).__init__()
self.Embedding = torch.nn.Embedding(num_embeddings=5,embedding_dim=DIM_embedding)
self.dropout = nn.Dropout(p=0.2)
self.conv1 = nn.Conv1d(in_channels = DIM_embedding, out_channels = DIM, kernel_size = 5,padding = 2)
self.BatchNorm1d = nn.BatchNorm1d(DIM)
self.relu1 = nn.ReLU()
self.MaxPool1d = nn.MaxPool1d(kernel_size=2)
self.conv2 = nn.Conv1d(in_channels = DIM, out_channels = DIM, kernel_size = 5,padding = 2)
self.BatchNorm1d = nn.BatchNorm1d(DIM)
self.relu1 = nn.ReLU()
self.MaxPool1d = nn.MaxPool1d(kernel_size=3,stride=2)
self.dropout = nn.Dropout(p=0.2)
self.LSTM = nn.LSTM(input_size=DIM,hidden_size=DIM,num_layers=1)
self.linear = nn.Linear(60 * DIM, 1)
def forward(self,x):
x = self.Embedding(x) #(batch_size, seq_len, embedding_dim)
x = self.dropout(x) #(batch_size, seq_len, embedding_dim)
x = x.permute(0, 2, 1)#(batch_size, embedding_dim, seq_len)
x = self.conv1(x) #(batch_size, dim, seq_len)
x = self.dropout(x)
x = self.relu1(x)#(batch_size, dim, seq_len)
x = self.MaxPool1d(x)#(batch_size, dim, 30)
#x = self.BatchNorm1d(x)#(batch_size, dim, 30)
x = x.permute(0, 2, 1)#(batch_size,30,dim,)
x1,(hn,cn) = self.LSTM(x)#(batch_size, 30, dim)
x = x.permute(0, 2, 1)#(batch_size, dim, 30)
x = self.conv2(x) #(batch_size, dim, 30)
x = self.dropout(x)
x = self.relu1(x)#(batch_size, dim, 30)
x = self.BatchNorm1d(x)#(batch_size, dim, 30)
x = x.permute(0, 2, 1)#(batch_size,30,dim,)
x2,(hn,cn) = self.LSTM(x)#(batch_size, 30, dim)
x = torch.cat((x1,x2),1)
x = self.dropout(x)
x = torch.reshape(x, (-1, 60 * DIM)) #(batch_size, seq_len * dim)
x = self.linear(x) #(batch_size, 1)
x = x.squeeze(-1)
return x
希望知道这里的nn.embedding的底层原理,就是数字和嵌入的向量的关系
另外不知道有没有能在matlab上能实现类似的num2vec的功能的函数
万分感谢