现在有一个217个股票节点的网络,1290条有权重的边。数据集就是一个稀疏矩阵,从源节点到目标节点,和连边权重。现在用图卷积神经网络GCN对这个股票网络进行社团划分,按照每个股票节点之间的联系密切程度进行划分。

import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
import networkx as nx
import openpyxl # 用于写入Excel
from sklearn.cluster import SpectralClustering
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch_geometric.utils import to_networkx
import matplotlib as plt
edge_data = pd.read_excel("C:/Users/wanzi/Desktop/测试数据.xlsx") #获取数据
edge_index = torch.tensor(edge_data[['source', 'target']].values, dtype=torch.long).t().contiguous()
edge_attr = torch.tensor(edge_data['weight'].values, dtype=torch.float)
matrix = pd.read_excel("C:/Users/aaa/Desktop/节点特征.xlsx") #将节点的真实社团标签作为特征向量
matrix = np.array(matrix["modularity_class"])
vectors = matrix.T.reshape(-1, 1)
# 假设每个节点的特征是它的索引(可根据实际情况更改)
features = torch.tensor(vectors,dtype=torch.float)
# 构建图数据对象
data = Data(x=features, edge_index=edge_index, edge_attr=edge_attr)
# 第三步: 定义和训练图卷积神经网络模型
# 定义GCN模型
class GCN(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super(GCN, self).__init__()
self.conv1 = GCNConv(input_dim, hidden_dim)
self.conv2 = GCNConv(hidden_dim, hidden_dim)
self.conv3 = GCNConv(hidden_dim,hidden_dim)
self.conv4 = GCNConv(hidden_dim,hidden_dim)
self.conv5 = GCNConv(hidden_dim,output_dim)
def forward(self, x, edge_index, edge_attr):
x = torch.relu(self.conv1(x, edge_index, edge_attr))
x = self.conv2(x, edge_index, edge_attr)
x = torch.relu(x)
x = self.conv3(x,edge_index,edge_attr)
x = torch.relu(x)
x = self.conv4(x,edge_index,edge_attr)
x = torch.relu(x)
x = self.conv5(x,edge_index,edge_attr)
return x
# 初始化模型和优化器
model = GCN(input_dim=1, hidden_dim=64, output_dim=12) # 输出维度是嵌入维度
optimizer = torch.optim.Adam(model.parameters(), lr=0.001,weight_decay=0.0005,amsgrad=True)
# 训练模型 (这里使用一个简单的无监督学习方式)
def train():
model.train()
optimizer.zero_grad()
embeddings = model(data.x, data.edge_index, data.edge_attr)
# 这里我们使用边权重的平方作为损失函数的一部分,以鼓励模型保持高权重的边连接的节点接近
loss = -torch.mean(data.edge_attr * torch.sum((embeddings[data.edge_index[0]] - embeddings[data.edge_index[1]]) ** 2, dim=1))
loss.backward()
optimizer.step()
return embeddings, loss.item()
# 训练模型
for epoch in range(1000):
embeddings, loss = train()
if epoch % 10 == 0:
print(f'Epoch {epoch} Loss: {loss}')
# 第四步: 使用网络嵌入结果进行社团划分
embeddings = embeddings.detach().numpy()
kmeans = KMeans(n_clusters=12) # 假设我们想要划分成5个社团
cluster_labels = kmeans.fit_predict(embeddings)
# 打印社团划分结果
print(cluster_labels)
请问上面的代码哪有不对吗,得到的社团划分结果很差,NMI也就0.3左右。