m0_55343311 2023-08-08 11:48 采纳率: 42.9%
浏览 5

关于#深度学习#的问题:exit("Error: There is only a cluster detected. The resolution:" + str((语言-python)

深度学习自编码器修改损失函数的问题

class ClusteringLayer(Layer):
    """
    Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
    sample belonging to each cluster. The probability is calculated with student's t-distribution.

    # Example
    ```
        model.add(ClusteringLayer(n_clusters=10))
    ```
    # Arguments
        n_clusters: number of clusters.
        weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
        alpha: parameter in Student's t-distribution. Default to 1.0.
    # Input shape
        2D tensor with shape: `(n_samples, n_features)`.
    # Output shape
        2D tensor with shape: `(n_samples, n_clusters)`.
    """

    def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super(ClusteringLayer, self).__init__(**kwargs)
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.initial_weights = weights
        self.input_spec = InputSpec(ndim=2)

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
        self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters') #(self.n_clusters, input_dim)
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
        self.built = True

    def call(self, inputs, **kwargs):

        q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
        q **= (self.alpha + 1.0) / 2.0
        q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
        return q

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return input_shape[0], self.n_clusters

    def get_config(self):
        config = {'n_clusters': self.n_clusters}
        base_config = super(ClusteringLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

class Desc(object):  #DescModel
    def __init__(self,
                 dims,
                 x, 
                 alpha=1.0,
                 tol=0.005,
                 init='glorot_uniform',  # initialization method
                 louvain_resolution=1.0,  # resolution for louvain
                 n_neighbors=10,  # the
                 pretrain_epochs=300,  # epoch for autoencoder
                 epochs_fit=4,  # epochs for each update,int or float
                 batch_size=256,  # batch_size for autoencoder
                 random_seed=201809,
                 activation='relu',
                 actincenter="tanh",  # activation for the last layer in encoder, and first layer in the decoder
                 drop_rate_SAE=0.2,
                 is_stacked=True,
                 use_earlyStop=True,
                 use_ae_weights=False,
                 save_encoder_weights=False,
                 save_encoder_step=5,
                 save_dir="result_tmp",
                 kernel_clustering="t",
                 n_clusters=10
                 ):

        super(Desc, self).__init__()  #Model

        self.dims = dims
        self.x = x  # feature n*p, n:number of cells, p: number of genes
        self.alpha = alpha
        self.tol = tol
        self.init = init
        self.input_dim = dims[0]  # for clustering layer
        self.n_stacks = len(self.dims) - 1
        self.is_stacked = is_stacked
        self.resolution = louvain_resolution
        self.n_neighbors = n_neighbors
        self.pretrain_epochs = pretrain_epochs
        self.epochs_fit = epochs_fit
        self.batch_size = batch_size
        self.random_seed = random_seed
        self.activation = activation
        self.actincenter = actincenter
        self.drop_rate_SAE = drop_rate_SAE
        self.is_stacked = is_stacked
        self.use_earlyStop = use_earlyStop
        self.use_ae_weights = use_ae_weights
        self.save_encoder_weights = save_encoder_weights
        self.save_encoder_step = save_encoder_step
        self.save_dir = save_dir
        self.kernel_clustering = kernel_clustering
        self.n_clusters = n_clusters
   
        random.seed(random_seed)
        np.random.seed(random_seed)
        tf.random.set_seed(random_seed)
      
        self.autoencoder = SAE.autoencoder(self)#SAE1.SAE.make_autoencoders(self)    SAE.make_stack(self, ith=self.n_stacks-1)
        hidden = self.autoencoder.get_layer(name='encoder_%d' % (self.n_stacks - 1)).output  
        self.encoder = Model(inputs=self.autoencoder.input, outputs=hidden)
clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')(hidden)# self.encoder.output DESC模型没有output self.kernel_clustering
        self.model = Model(inputs=self.autoencoder.input, outputs=[clustering_layer, self.autoencoder.input])
    def pretrain(self):
        sae = SAE(dims=self.dims,
                  act=self.activation,
                  drop_rate=self.drop_rate_SAE,
                  batch_size=self.batch_size,
                  random_seed=self.random_seed,
                  actincenter=self.actincenter,
                  init=self.init,
                  use_earlyStop=self.use_earlyStop,
                  save_dir=self.save_dir
                  )
        # begin pretraining
        t0 = get_time()
        if self.use_ae_weights:
            print("Checking whether %s  exists in the directory" % str(os.path.join(self.save_dir, 'ae_weights.h5')))
            if not os.path.isfile(self.save_dir + "/ae_weights.h5"):
                print("The file ae_weights.h5 is not exits")
                if self.is_stacked:
                    sae.fit(self.x, epochs=self.pretrain_epochs)
                else:
                    sae.fit2(self.x, epochs=self.pretrain_epochs)
                self.autoencoder = sae.autoencoders
                self.encoder = sae.encoder
            else:
                sae.autoencoders.load_weights(os.path.join(self.save_dir, "ae_weights.h5"))  #
                self.autoencoder = sae.autoencoders
                self.encoder = sae.encoder
        else:
            print("use_ae_weights=False, the program will rerun autoencoder")
            if self.is_stacked:
                sae.fit(self.x, epochs=self.pretrain_epochs)
            else:
                sae.fit2(self.x, epochs=self.pretrain_epochs)
            self.autoencoder = sae.autoencoders
            self.encoder = sae.encoder

        print('Pretraining time is', get_time() - t0)
        # save ae results into disk
        if not os.path.isfile(os.path.join(self.save_dir, "ae_weights.h5")):
            self.autoencoder.save_weights(os.path.join(self.save_dir, 'ae_weights.h5'))
            self.encoder.save_weights(os.path.join(self.save_dir, 'encoder_weights.h5'))
            print('Pretrained weights are saved to %s /ae_weights.h5' % self.save_dir)
        # save autoencoder model
        self.autoencoder.save(os.path.join(self.save_dir, "autoencoder_model.h5"))
        # initialize cluster centers using louvain if n_clusters is not exist
        features = self.extract_features(self.x)
        features = np.asarray(features)
        # if isinstance(n_clusters,int):
        if False:
            # saved for other initizlization methods in futher
            # print("...number of clusters have been specified, Initializing Cluster centroid  using K-Means")
            """
             kmeans = KMeans(n_clusters=n_clusters, n_init=20)
            Y_pred_init = kmeans.fit_predict(features)
            self.init_pred= np.copy(Y_pred_init)
            self.n_clusters=n_clusters
            cluster_centers=kmeans.cluster_centers_
            self.init_centroid=[cluster_centers]
            """
        else:
            print("...number of clusters is unknown, Initialize cluster centroid using louvain method")
            # can be replaced by other clustering methods
            # using louvain methods in scanpy
            adata0 = sc.AnnData(features)
            if adata0.shape[0] > 200000:
                np.random.seed(adata0.shape[0])  # set  seed
                adata0 = adata0[np.random.choice(adata0.shape[0], 200000, replace=False)]
            sc.pp.neighbors(adata0, n_neighbors=self.n_neighbors, use_rep="X")
            sc.tl.louvain(adata0, resolution=self.resolution)
            Y_pred_init = adata0.obs['louvain']
            self.init_pred = np.asarray(Y_pred_init, dtype=int)
            if np.unique(self.init_pred).shape[0] <= 1:
                # avoid only a cluster
                # print(np.unique(self.init_pred))
                exit("Error: There is only a cluster detected. The resolution:" + str(
                    self.resolution) + "is too small, please choose a larger resolution!!")
            features = pd.DataFrame(adata0.X, index=np.arange(0, adata0.shape[0]))
            Group = pd.Series(self.init_pred, index=np.arange(0, adata0.shape[0]), name="Group")
            Mergefeature = pd.concat([features, Group], axis=1)
            cluster_centers = np.asarray(Mergefeature.groupby("Group").mean())
            self.n_clusters = cluster_centers.shape[0]
            self.init_centroid = [cluster_centers]
        # create desc clustering layer
        if self.kernel_clustering == "gaussian":
            clustering_layer = ClusteringLayerGaussian(self.n_clusters, weights=self.init_centroid, name='clustering')(
                self.encoder.output)
        else:
            clustering_layer = ClusteringLayer(self.n_clusters, weights=self.init_centroid, name='clustering')(
                self.encoder.output)
        self.model = Model(inputs=self.encoder.input,
                           outputs=clustering_layer) 
    def load_weights(self, weights):  # load weights of DEC model
        self.model.load_weights(weights)

    def extract_features(self, x):
        #encoder = Model(self.encoder.input, self.encoder.get_layer('encoder_%d' % (self.n_stacks - 1)).output)#IDEC多
        return self.encoder.predict(x) #self.encoder.predict(x)

    def predict(self, x):  # predict cluster labels using the output of clustering layer
        q = self.model.predict(x, verbose=0)#q=
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):
        weight = q ** 2 / q.sum(0)
        return (weight.T / weight.sum(1)).T


    def compile(self, loss={'clustering': 'kld', 'decoder_0': 'mse'}, optimizer='sgd',loss_weights=[1., 0.2]):#def compile(self, optimizer='sgd', loss='kld'):'clustering': 'kld'lambda y_pred:y_pred]
        self.model.compile(optimizer=optimizer, loss=loss, loss_weights=loss_weights)

    def fit_on_batch(self, maxiter=1e4, update_interval=200, save_encoder_step=4):
        save_dir = self.save_dir
        # step1 initial weights by louvain,or Kmeans
        self.model.get_layer(name='clustering').set_weights(self.init_centroid)
        # Step 2: deep clustering
        y_pred_last = np.copy(self.init_pred)
        index_array = np.arange(self.x.shape[0])
        index = 0
        for ite in range(int(maxiter)):
            if self.save_encoder_weights and ite % (save_encoder_step * update_interval) == 0:
                self.encoder.save_weights(os.path.join(self.save_dir,
                                                       'encoder_weights_resolution_' + str(self.resolution) + "_" + str(
                                                           ite) + '.h5'))
                print('Fine tuning encoder weights are saved to %s/encoder_weights.h5' % self.save_dir)
            if ite % update_interval == 0:
                q = self.model.predict(self.x, verbose=0)
                p = self.target_distribution(q)
                y_pred = q.argmax(1)
                delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
                y_pred_last = np.copy(y_pred)
                print("The value of delta_label of current", str(ite + 1), "th iteration is", delta_label, ">= tol",
                      self.tol)
                if ite > 0 and delta_label < self.tol:
                    print('delta_label ', delta_label, '< tol ', self.tol)
                    print('Reached tolerance threshold. Stop training.')
                    break
            idx = index_array[index * self.batch_size: min((index + 1) * self.batch_size, self.x.shape[0])]
            loss = self.model.train_on_batch(x=self.x[idx], y=p[idx])
            index = index + 1 if (index + 1) * self.batch_size <= self.x.shape[0] else 0
        # save encoder model
        self.encoder.save(os.path.join(self.save_dir, "encoder_model.h5"))
        # load model
        # encoder=load_model("encoder.h5")
        #
        y0 = pd.Series(y_pred, dtype='category')
        y0.cat.categories = range(0, len(y0.cat.categories))
        print("The final prediction cluster is:")
        x = y0.value_counts()
        print(x.sort_index(ascending=True))
        Embedded_z = self.extract_features(self.x)
        q = self.model.predict(self.x, verbose=0)
        return Embedded_z,q

自从把损失函数 self.model = Model(inputs=self.autoencoder.input, outputs=[clustering_layer, self.autoencoder.input])从一个输出改成两个,但是后面fit程序就会报错,比如AttributeError: 'Desc' object has no attribute 'init_centroid'然后加上特征又会报错AttributeError: 'Desc' object has no attribute 'encoder' 请问大家后面的程序fit需要怎么改动才能符合两个损失函数

  • 写回答

2条回答 默认 最新

  • 梦幻精灵_cq 2023-08-08 12:02
    关注
    • 舍弃报错方法的引用,就不会报错。因为方法引用的对象,已不是代码预期的对象。

    • 修正:检读代码,查找为什么不能得到方法引用对象“本来的样子”?


    评论

报告相同问题?

问题事件

  • 创建了问题 8月8日

悬赏问题

  • ¥15 mySQL5.7.34安装遇到的问题
  • ¥15 结构功能耦合指标计算
  • ¥20 visual studio中c语言用ODBC链接SQL SERVER
  • ¥50 AI大模型精调(百度千帆、飞浆)
  • ¥15 非科班怎么跑代码?如何导数据和调参
  • ¥15 福州市的全人群死因监测点死亡原因报表
  • ¥15 Altair EDEM中生成一个颗粒,并且各个方向没有初始速度
  • ¥15 系统2008r2 装机配置推荐一下
  • ¥15 悬赏Python-playwright部署在centos7上
  • ¥15 psoc creator软件有没有人能远程安装啊