From a65f9e1dbc023b4194d661d9e665a62b03b69566 Mon Sep 17 00:00:00 2001
From: Hance_Wu_M1 <32484940+wzh4464@users.noreply.github.com>
Date: Sun, 4 Aug 2024 19:22:24 +0800
Subject: [PATCH] yan's advice

---
 main.tex                  |   2 +-
 sections/conclusion.tex   |   2 +-
 sections/experiment.tex   |   2 +-
 sections/introduction.tex |   6 +-
 sections/method.tex       |   5 +-
 sections/related_work.tex |   2 +-
 updated.bib               | 211 ++++++++++++++------------------------
 7 files changed, 85 insertions(+), 145 deletions(-)

diff --git a/main.tex b/main.tex
index 2f12a8b..24194ea 100644
--- a/main.tex
+++ b/main.tex
@@ -8,7 +8,7 @@
  % Created Date: Thursday, July 11th 2024
  % Author: Zihan
  % -----
- % Last Modified: Sunday, 4th August 2024 11:02:16 am
+ % Last Modified: Sunday, 4th August 2024 7:41:27 pm
  % Modified By: the developer formerly known as Zihan at <wzh4464@gmail.com>
  % -----
  % HISTORY:
diff --git a/sections/conclusion.tex b/sections/conclusion.tex
index 5eb254a..43bc675 100644
--- a/sections/conclusion.tex
+++ b/sections/conclusion.tex
@@ -13,4 +13,4 @@
 
 \section{Conclusion}
 \label{sec:conclude}
-This paper introduces a novel, scalable co-clustering method for large matrices, addressing the computational challenges of high-dimensional data analysis. Our method first partitions large matrices into smaller, parallel-processed submatrices, significantly reducing processing time. Next, a hierarchical co-cluster merging algorithm integrates the submatrix results, ensuring accurate and consistent final co-clustering. Extensive evaluations demonstrate that our method outperforms existing solutions in handling large-scale datasets, proving its effectiveness, efficiency, and scalability. This work sets a new benchmark for future research in scalable data analysis technologies.
+This paper introduces a novel, scalable co-clustering method for large matrices, addressing the computational challenges of high-dimensional data analysis. Our method first partitions large matrices into smaller, parallel-processed submatrices, significantly reducing processing time. Next, a hierarchical co-cluster merging algorithm integrates the submatrix results, ensuring accurate and consistent final co-clustering. Extensive evaluations demonstrate that our method outperforms existing solutions in handling large-scale datasets, proving its effectiveness, efficiency, and scalability.
diff --git a/sections/experiment.tex b/sections/experiment.tex
index 9dd6fef..cab1c41 100644
--- a/sections/experiment.tex
+++ b/sections/experiment.tex
@@ -13,7 +13,7 @@
 
 \section{Experimental Evaluation}
 \label{sec:experiment}
-\subsection{Experimental Setup}
+\subsection{Experiment Setup}
 
 \textbf{Datasets.}
 The experiments were conducted using three distinct datasets to demonstrate the versatility and robustness of our method:
diff --git a/sections/introduction.tex b/sections/introduction.tex
index b544b0f..fa00385 100644
--- a/sections/introduction.tex
+++ b/sections/introduction.tex
@@ -12,7 +12,7 @@
 %%%
 
 \section{Introduction}
-Artificial Intelligence is a rapidly advancing technology facilitating complex data analysis, pattern recognition, and decision-making processes. Clustering, a fundamental unsupervised learning technique, groups data points based on shared features, aiding in interpreting complex data structures. However, traditional clustering algorithms \cite{zhang2023AdaptiveGraphConvolution, wu2023EffectiveClusteringStructured} tend to treat all features of data uniformly and solely cluster either rows (samples) or columns (features),  as shown in Figure \ref{fig:cluster}. They oversimplified interpretations and overlooked critical context-specific relationships within the data, especially when dealing with large, high-dimensional datasets \cite{chen2023FastFlexibleBipartite, zhao2023MultiviewCoclusteringMultisimilarity, kumar2023CoclusteringBasedMethods}.
+Artificial Intelligence is a rapidly advancing technology facilitating complex data analysis, pattern recognition, and decision-making processes. Clustering, a fundamental unsupervised learning technique, groups data points based on shared features, aiding in interpreting complex data structures. However, traditional clustering algorithms \cite{zhang2023AdaptiveGraphConvolution, wu2023EffectiveClusteringStructured} treat all features of data uniformly and solely cluster either rows (samples) or columns (features),  as shown in Figure \ref{fig:cluster}. They oversimplified interpretations and overlooked critical context-specific relationships within the data, especially when dealing with large, high-dimensional datasets \cite{chen2023FastFlexibleBipartite, zhao2023MultiviewCoclusteringMultisimilarity, kumar2023CoclusteringBasedMethods}.
 
 \textit{Co-clustering} \cite{kluger2003SpectralBiclusteringMicroarray, yan2017CoclusteringMultidimensionalBig} is a technique that groups rows (samples) and columns (features) simultaneously, as shown in Figure \ref{fig:cocluster}. It can reveal complex correlations between two different data types and is transformative in scenarios where the relationships between rows and columns are as important as the individual entities themselves. For example, in bioinformatics, co-clustering could identify gene-related patterns leading to biological insights by concurrently analyzing genes and conditions \cite{higham2007SpectralClusteringIts, kluger2003SpectralBiclusteringMicroarray, zhao2012BiclusteringAnalysisPattern}. In recommendation systems, co-clustering can simultaneously discover more fine-grained relationships between users and projects \cite{dhillon2007WeightedGraphCuts, chen2023ParallelNonNegativeMatrix}. Co-clustering extends traditional clustering methods, enhancing accuracy in pattern detection and broadening the scope of analyses.
 
@@ -39,7 +39,7 @@ \section{Introduction}
 \begin{itemize}
     \item{\textbf{High Computational Complexity.}} Co-clustering analyzes relationships both within and across the rows and columns of a dataset simultaneously. This dual-focus analysis requires evaluating a vast number of potential relationships, particularly as the dimensions of the data increase. The complexity can grow exponentially with the size of the data because the algorithm must process every possible combination of rows and columns to identify meaningful clusters \cite{hansen2011NonparametricCoclusteringLarge}.
     \item{\textbf{Significant Communication Overhead.}} Even when methods such as data partitioning are used to handle large-scale data, each partition may independently analyze a subset of the data. However, to optimize the clustering results globally, these partitions need to exchange intermediate results frequently. This requirement is inherent to iterative optimization techniques used in co-clustering, where each iteration aims to refine the clusters based on new data insights, necessitating continuous updates across the network. Such extensive communication can become a bottleneck, significantly slowing down the overall processing speed.
-    \item{\textbf{Dependency on Sparse Matrices.}} Many traditional co-clustering algorithms are designed to perform best with sparse matrices \cite{pan2008CRDFastCoclusteringa}. However, in many real-world applications, data matrices are often dense, meaning most elements are non-zero. Such scenarios present a significant challenge for standard co-clustering algorithms, as they must handle a larger volume of data without the computational shortcuts available with sparse matrices.
+    \item{\textbf{Dependency on Sparse Matrices.}} Several traditional co-clustering algorithms are designed to perform best with sparse matrices \cite{pan2008CRDFastCoclusteringa}. However, in many real-world applications, data matrices are often dense, meaning most elements are non-zero. Such scenarios present a significant challenge for standard co-clustering algorithms, as they must handle a larger volume of data without the computational shortcuts available with sparse matrices.
 \end{itemize}
 
 To address the inherent challenges associated with existing co-clustering methods, we propose a novel and scalable Adaptive Hierarchical Partitioning and Merging for Scalable Co-Clustering (\textbf{AHPM}) framework designed for large-scale datasets. First,  we propose a large matrix partitioning algorithm that divides the original data matrix into smaller submatrices. This partitioning facilitates parallel processing of co-clustering tasks across submatrices, significantly reducing both processing time and computational and storage demands for each processing unit. We also design a probabilistic model to determine the optimal number and configuration of these submatrices to ensure comprehensive data coverage.
@@ -51,7 +51,7 @@ \section{Introduction}
           We propose a novel matrix partitioning algorithm that enables parallel co-clustering by dividing a large matrix into optimally configured submatrices. This design is supported by a probabilistic model that calculates the optimal number and order of submatrices, balancing computational efficiency with the detection of relevant co-clusters.
     \item \textbf{Hierarchical Co-cluster Merging Algorithm:}
           We design a hierarchical co-cluster merging algorithm that combines co-clusters from submatrices, ensuring the completion of the co-clustering process within a pre-fixed number of iterations. This algorithm significantly enhances the robustness and reliability of the co-clustering process, effectively addressing model uncertainty.
-    \item \textbf{Experimental valuation:}
+    \item \textbf{Experimental Valuation:}
           We evaluate the effectiveness and efficiency of our method across a wide range of scenarios with large, complex data. Experimental results show an approximate 83\% decrease for dense matrices and up to 30\% for sparse matrices.
 \end{enumerate}
 
diff --git a/sections/method.tex b/sections/method.tex
index 17a6db2..09b9009 100644
--- a/sections/method.tex
+++ b/sections/method.tex
@@ -7,7 +7,7 @@
 \section{Mathematical Formulation and Problem Statement}\label{sec:formula}
 
 \subsection{Mathematical Formulation of Co-clustering}
-Co-clustering groups rows and columns of a data matrix $\mathbf{A} \in \mathbb{R}^{M \times N}$, where $M$ is the number of features and $N$ is the number of samples. Each element $a_{ij}$ represents the relationship between the $i$-th feature and the $j$-th sample. The goal is to partition $\mathbf{A}$ into $k$ row clusters and $d$ column clusters, creating $k \times d$ homogeneous submatrices $\mathbf{A}_{I, J}$.
+Co-clustering groups rows and columns of a data matrix $\mathbf{A} \in \mathbb{R}^{M \times N}$, where $M$ is the number of features and $N$ is the number of samples. Each element $a_{ij}$ represents the $i$-th feature of the $j$-th sample. The goal is to partition $\mathbf{A}$ into $k$ row clusters and $d$ column clusters, creating $k \times d$ homogeneous submatrices $\mathbf{A}_{I, J}$.
 
 When optimally reordered, $\mathbf{A}$ forms a block-diagonal structure where each block is a co-cluster with high internal similarity. Row and column labels are \( u \in \{1,\dots,k\}^M \) and \( v \in \{1,\dots,d\}^N \). Indicator matrices \( R \in \mathbb{R}^{M \times k} \) and \( C \in \mathbb{R}^{N \times d} \) assign rows and columns to clusters, ensuring unique assignments.
 
@@ -24,7 +24,6 @@ \subsection{Problem Statement}
         $a_{ij}$               & Element at the $i$-th row and $j$-th column of matrix $\mathbf{A}$.                                                            \\
         $I, J$                 & Indices of rows and columns selected for co-clustering.                                                                        \\
         $\mathbf{A}_{I, J}$    & Submatrix containing the rows indexed by $I$ and columns by $J$.                                                               \\
-        $k, d$                 & Number of row clusters and column clusters, respectively.                                                                      \\
         $R, C$                 & Indicator matrices for row and column cluster assignments.                                                                     \\
         $\phi_i, \psi_j$       & Block sizes in rows and columns, respectively.                                                                                 \\
         $s_i^{(k)}, t_j^{(k)}$ & Minimum row and column sizes of co-cluster $C_k$ in block $B_{(i,j)}$.                                                         \\
@@ -58,7 +57,7 @@ \subsection{Overview}
 
 \subsection{Large Matrix Partitioning}
 % Description of the matrix partitioning process and criteria for partitioning.
-The primary challenge in co-clustering large matrices is the risk of losing meaningful co-cluster relationships when the matrix is partitioned into smaller submatrices. To address this, we introduce an optimal partitioning algorithm underpinned by a probabilistic model. This model is meticulously designed to navigate the complexities of partitioning, ensuring that the integrity of co-clusters is maintained even as the matrix is divided. The objective of this algorithm is twofold: to determine the optimal partitioning strategy that minimizes the risk of fragmenting significant co-clusters and to define the appropriate number of repartitioning iterations needed to achieve a desired success rate of co-cluster identification.
+The primary challenge in co-clustering large matrices is the risk of losing co-clusters when the matrix is partitioned into smaller submatrices. To address this, we introduce an optimal partitioning algorithm underpinned by a probabilistic model. This model is meticulously designed to navigate the complexities of partitioning, ensuring that the integrity of co-clusters is maintained even as the matrix is divided. The objective of this algorithm is twofold: to determine the optimal partitioning strategy that minimizes the risk of fragmenting significant co-clusters and to define the appropriate number of repartitioning iterations needed to achieve a desired success rate of co-cluster identification.
 
 \subsubsection{Partitioning and Repartitioning Strategy based on the Probabilistic Model}
 Our probabilistic model serves as the cornerstone of the partitioning algorithm. It evaluates potential partitioning schemes based on their ability to preserve meaningful co-cluster structures within smaller submatrices. The model operates under the premise that each atom-co-cluster (the smallest identifiable co-cluster within a submatrix) can be identified with a probability $p$. This probabilistic model allows us to estimate the likelihood of successfully identifying all relevant co-clusters across the partitioned submatrices.
diff --git a/sections/related_work.tex b/sections/related_work.tex
index ecf2a99..fc30ad8 100644
--- a/sections/related_work.tex
+++ b/sections/related_work.tex
@@ -14,7 +14,7 @@
 \section{Related work}
 \label{sec:related_work}
 \subsection{Co-clustering Methods}
-Co-clustering methods, broadly categorized into graph-based and matrix factorization-based approaches, have limitations in handling large datasets. Graph-based methods like Flexible Bipartite Graph Co-clustering (FBGPC) \cite{chen2023FastFlexibleBipartitea} directly apply flexible bipartite graph models. Matrix factorization-based methods, such as Non-negative Matrix Tri-Factorization (NMTF) \cite{long2005CoclusteringBlockValue}, decompose data to cluster samples and features separately. Deep Co-Clustering (DeepCC) \cite{dongkuanxu2019DeepCoClustering}, which integrates deep autoencoders with Gaussian Mixture Models, also faces efficiency challenges with diverse data types and large datasets.
+Co-clustering methods, broadly categorized into graph-based and matrix factorization-based approaches, have limitations in handling large datasets. Graph-based methods like Flexible Bipartite Graph Co-clustering (FBGPC) \cite{chen2023FastFlexibleBipartite} directly apply flexible bipartite graph models. Matrix factorization-based methods, such as Non-negative Matrix Tri-Factorization (NMTF) \cite{long2005CoclusteringBlockValue}, decompose data to cluster samples and features separately. Deep Co-Clustering (DeepCC) \cite{dongkuanxu2019DeepCoClustering}, which integrates deep autoencoders with Gaussian Mixture Models, also faces efficiency challenges with diverse data types and large datasets.
 
 \subsection{Parallelizing Co-clustering}
 Parallel methods are crucial for big data processing. The CoClusterD framework \cite{cheng2015CoClusterDDistributedFramework} uses Alternating Minimization Co-clustering (AMCC) in a distributed environment but struggles with guaranteed convergence. Chen \textit{et al.} \cite{chen2023ParallelNonNegativeMatrix} introduced a parallel non-negative matrix tri-factorization method to accelerate computations but still faces difficulties with very large datasets.
diff --git a/updated.bib b/updated.bib
index dad04e6..e8cafd6 100644
--- a/updated.bib
+++ b/updated.bib
@@ -1,10 +1,8 @@
 @incollection{bouchareb2019ModelBasedCoclustering,
   abstract  = {Co-clustering is a data mining technique used to extract the underlying block structureBouchareb, Aichetou\&\#160;between the rows and columns of a data matrix. Many approaches have been studied and have shown their capacity to extractBoull\&\#233;, Marc such structures...},
   author    = {Bouchareb, Aichetou and Boullé, Marc and Clérot, Fabrice and Rossi, Fabrice},
-  publisher = {Springer, Cham},
-  url       = {https://doi.org/10.1007/978-3-030-18129-1_1},
   booktitle = {Advances in {{Knowledge Discovery}} and {{Management}}},
-  date      = {2019},
+  year      = {2019},
   doi       = {10.1007/978-3-030-18129-1_1},
   file      = {D:\zihan\Zotero\storage\B6BTC8D7\Bouchareb 等 - 2019 - Model based co-clustering of mixed numerical and binary data.pdf},
   isbn      = {978-3-030-18129-1},
@@ -12,55 +10,36 @@ @incollection{bouchareb2019ModelBasedCoclustering
   langid    = {english},
   pages     = {3--22},
   title     = {Model Based Co-Clustering of Mixed Numerical and Binary Data},
-  urldate   = {2024-02-09}
+  urlyear   = {2024}
 }
 
 @article{chen2023FastFlexibleBipartite,
   abstract     = {Co-clustering methods make use of the correlation between samples and attributes to explore the co-occurrence structure in data. These methods have played a significant role in gene expression analysis, image segmentation, and document clustering. In bipartite graph partition-based co-clustering methods, the relationship between samples and attributes is described by constructing a diagonal symmetric bipartite graph matrix, which is clustered by the philosophy of spectral clustering. However, this not only has high time complexity but also the same number of row and column clusters. In fact, the number of categories of rows and columns often changes in the real world. To address these problems, this paper proposes a novel fast flexible bipartite graph model for the co-clustering method (FBGPC) that directly uses the original matrix to construct the bipartite graph. Then, it uses the inflation operation to partition the bipartite graph in order to learn the co-occurrence structure of the original data matrix based on the inherent relationship between bipartite graph partitioning and co-clustering. Finally, hierarchical clustering is used to obtain the clustering results according to the set relationship of the co-occurrence structure. Extensive empirical results show the effectiveness of our proposed model and verify the faster performance, generality, and flexibility of our model.},
   author       = {Chen, Wei and Wang, Hongjun and Long, Zhiguo and Li, Tianrui},
-  url          = {https://doi.org/10.1109/TKDE.2022.3194275},
-  date         = {2023-07},
+  year         = {2023},
   doi          = {10.1109/TKDE.2022.3194275},
   eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
   file         = {D\:\\zihan\\Zotero\\storage\\IQIEQB6I\\Chen 等 - 2023 - Fast flexible bipartite graph model for co-clustering.pdf;D\:\\zihan\\Zotero\\storage\\KED2JCCS\\9842309.html},
   issn         = {1558-2191},
-  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
+  journal      = {IEEE Transactions on Knowledge and Data Engineering},
   keywords     = {Bipartite graph,bipartite graph partition,Clustering algorithms,Clustering methods,Co-clustering,Computational modeling,Data models,faster performance,flexibility,Partitioning algorithms,Time complexity},
   langid       = {english},
   number       = {7},
   pages        = {6930--6940},
   shortjournal = {IEEE Trans. Knowl. Data Eng.},
   title        = {Fast Flexible Bipartite Graph Model for Co-Clustering},
-  urldate      = {2024-02-09},
-  volume       = {35}
-}
-
-@article{chen2023FastFlexibleBipartitea,
-  abstract     = {Co-clustering methods make use of the correlation between samples and attributes to explore the co-occurrence structure in data. These methods have played a significant role in gene expression analysis, image segmentation, and document clustering. In bipartite graph partition-based co-clustering methods, the relationship between samples and attributes is described by constructing a diagonal symmetric bipartite graph matrix, which is clustered by the philosophy of spectral clustering. However, this not only has high time complexity but also the same number of row and column clusters. In fact, the number of categories of rows and columns often changes in the real world. To address these problems, this paper proposes a novel fast flexible bipartite graph model for the co-clustering method (FBGPC) that directly uses the original matrix to construct the bipartite graph. Then, it uses the inflation operation to partition the bipartite graph in order to learn the co-occurrence structure of the original data matrix based on the inherent relationship between bipartite graph partitioning and co-clustering. Finally, hierarchical clustering is used to obtain the clustering results according to the set relationship of the co-occurrence structure. Extensive empirical results show the effectiveness of our proposed model and verify the faster performance, generality, and flexibility of our model.},
-  author       = {Chen, Wei and Wang, Hongjun and Long, Zhiguo and Li, Tianrui},
-  url          = {https://doi.org/10.1109/TKDE.2022.3194275},
-  date         = {2023-07},
-  doi          = {10.1109/TKDE.2022.3194275},
-  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
-  file         = {/Volumes/Mac_Ext/Zotero/storage/KSAR6TWQ/Chen 等 - 2023 - Fast Flexible Bipartite Graph Model for Co-Clustering.pdf;/Volumes/Mac_Ext/Zotero/storage/SJBTPX4P/9842309.html},
-  issn         = {1558-2191},
-  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
-  keywords     = {Bipartite graph,bipartite graph partition,Clustering algorithms,Clustering methods,Co-clustering,Computational modeling,Data models,faster performance,flexibility,Partitioning algorithms,Time complexity},
-  number       = {7},
-  pages        = {6930--6940},
-  title        = {Fast {{Flexible Bipartite Graph Model}} for {{Co-Clustering}}},
-  urldate      = {2024-03-27},
+  urlyear      = {2024},
   volume       = {35}
 }
 
 @article{chen2023ParallelNonNegativeMatrix,
   abstract     = {As a novel paradigm for data mining and dimensionality reduction, Non-negative Matrix Tri-Factorization (NMTF) has attracted much attention due to its notable performance and elegant mathematical derivation, and it has been applied to a plethora of real-world applications, such as text data co-clustering. However, the existing NMTF-based methods usually involve intensive matrix multiplications, which exhibits a major limitation of high computational complexity. With the explosion at both the size and the feature dimension of texts, there is a growing need to develop a parallel and scalable NMTF-based algorithm for text data co-clustering. To this end, we first show in this paper how to theoretically derive the original optimization problem of NMTF by introducing the Lagrangian multipliers. Then, we propose to solve the Lagrange dual objective function in parallel through an efficient distributed implementation. Extensive experiments on five benchmark corpora validate the effectiveness, efficiency, and scalability of our distributed parallel update algorithm for an NMTF-based text data co-clustering method.},
   author       = {Chen, Yufu and Lei, Zhiqi and Rao, Yanghui and Xie, Haoran and Wang, Fu Lee and Yin, Jian and Li, Qing},
-  date         = {2023-05},
+  year         = {2023},
   doi          = {10.1109/TKDE.2022.3145489},
   eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
   issn         = {1558-2191},
-  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
+  journal      = {IEEE Transactions on Knowledge and Data Engineering},
   number       = {5},
   pages        = {5132--5146},
   shortjournal = {IEEE Trans. Knowl. Data Eng.},
@@ -69,84 +48,68 @@ @article{chen2023ParallelNonNegativeMatrix
 }
 
 @article{cheng2015CoClusterDDistributedFramework,
-  abstract     = {Co-clustering has emerged to be a powerful data mining tool for two-dimensional co-occurrence and dyadic data. However, co-clustering algorithms often require significant computational resources and have been dismissed as impractical for large data sets. Existing studies have provided strong empirical evidence that expectation-maximization (EM) algorithms (e.g., k-means algorithm) with sequential updates can significantly reduce the computational cost without degrading the resulting solution. Motivated by this observation, we introduce sequential updates for alternate minimization co-clustering (AMCC) algorithms which are variants of EM algorithms, and also show that AMCC algorithms with sequential updates converge. We then propose two approaches to parallelize AMCC algorithms with sequential updates in a distributed environment. Both approaches are proved to maintain the convergence properties of AMCC algorithms. Based on these two approaches, we present a new distributed framework, Co-ClusterD, which supports efficient implementations of AMCC algorithms with sequential updates. We design and implement Co-ClusterD, and show its efficiency through two AMCC algorithms: fast nonnegative matrix tri-factorization (FNMTF) and information theoretic co-clustering (ITCC). We evaluate our framework on both a local cluster of machines and the Amazon EC2 cloud. Empirical results show that AMCC algorithms implemented in Co-ClusterD can achieve a much faster convergence and often obtain better results than their traditional concurrent counterparts.},
-  author       = {Cheng, Xiang and Su, Sen and Gao, Lixin and Yin, Jiangtao},
-  url          = {https://doi.org/10.1109/TKDE.2015.2451634},
-  date         = {2015-12},
-  doi          = {10.1109/TKDE.2015.2451634},
-  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
-  file         = {/Volumes/Mac_Ext/Zotero/storage/INTX2GR8/Cheng 等 - 2015 - Co-ClusterD A distributed framework for data co-clustering with sequential updates.pdf;/Volumes/Mac_Ext/Zotero/storage/WXFGE5JN/7145441.html},
-  issn         = {1558-2191},
-  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
-  keywords     = {Algorithm design and analysis,Approximation algorithms,cloud computing,Cloud Computing,Clustering algorithms,Co-Clustering,concurrent updates,Concurrent Updates,Convergence,distributed framework,Distributed Framework,Linear programming,Minimization,Prototypes,sequential updates,Sequential Updates},
-  number       = {12},
-  pages        = {3231--3244},
-  shorttitle   = {Co-{{ClusterD}}},
-  title        = {Co-{{ClusterD}}: {{A}} Distributed Framework for Data Co-Clustering with Sequential Updates},
-  urldate      = {2024-03-27},
-  volume       = {27}
+  abstract   = {Co-clustering has emerged to be a powerful data mining tool for two-dimensional co-occurrence and dyadic data. However, co-clustering algorithms often require significant computational resources and have been dismissed as impractical for large data sets. Existing studies have provided strong empirical evidence that expectation-maximization (EM) algorithms (e.g., k-means algorithm) with sequential updates can significantly reduce the computational cost without degrading the resulting solution. Motivated by this observation, we introduce sequential updates for alternate minimization co-clustering (AMCC) algorithms which are variants of EM algorithms, and also show that AMCC algorithms with sequential updates converge. We then propose two approaches to parallelize AMCC algorithms with sequential updates in a distributed environment. Both approaches are proved to maintain the convergence properties of AMCC algorithms. Based on these two approaches, we present a new distributed framework, Co-ClusterD, which supports efficient implementations of AMCC algorithms with sequential updates. We design and implement Co-ClusterD, and show its efficiency through two AMCC algorithms: fast nonnegative matrix tri-factorization (FNMTF) and information theoretic co-clustering (ITCC). We evaluate our framework on both a local cluster of machines and the Amazon EC2 cloud. Empirical results show that AMCC algorithms implemented in Co-ClusterD can achieve a much faster convergence and often obtain better results than their traditional concurrent counterparts.},
+  author     = {Cheng, Xiang and Su, Sen and Gao, Lixin and Yin, Jiangtao},
+  year       = {2015},
+  doi        = {10.1109/TKDE.2015.2451634},
+  eventtitle = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
+  file       = {/Volumes/Mac_Ext/Zotero/storage/INTX2GR8/Cheng 等 - 2015 - Co-ClusterD A distributed framework for data co-clustering with sequential updates.pdf;/Volumes/Mac_Ext/Zotero/storage/WXFGE5JN/7145441.html},
+  issn       = {1558-2191},
+  journal    = {IEEE Transactions on Knowledge and Data Engineering},
+  keywords   = {Algorithm design and analysis,Approximation algorithms,cloud computing,Cloud Computing,Clustering algorithms,Co-Clustering,concurrent updates,Concurrent Updates,Convergence,distributed framework,Distributed Framework,Linear programming,Minimization,Prototypes,sequential updates,Sequential Updates},
+  number     = {12},
+  pages      = {3231--3244},
+  shorttitle = {Co-{{ClusterD}}},
+  title      = {Co-{{ClusterD}}: {{A}} Distributed Framework for Data Co-Clustering with Sequential Updates},
+  urlyear    = {2024},
+  volume     = {27}
 }
 
 @article{cheng2000BiclusteringExpressionData,
   abstract     = {An efficient node-deletion algorithm is introduced to find submatrices in expression data that have low mean squared residue scores and it is shown to perform well in finding co-regulation patterns in yeast and human. An efficient node-deletion algorithm is introduced to find submatrices in expression data that have low mean squared residue scores and it is shown to perform well in finding co-regulation patterns in yeast and human. This introduces \&quot;biclustering\&quot;, or simultaneous clustering of both genes and conditions, to knowledge discovery from expression data. This approach overcomes some problems associated with traditional clustering methods, by allowing automatic discovery of similarity based on a subset of attributes, simultaneous clustering of genes and conditions, and overlapped grouping that provides a better representation for genes with multiple functions or regulated by many factors.},
   author       = {Cheng, Yizong and Church, G.},
   url          = {https://www.cs.princeton.edu/courses/archive/fall03/cs597F/Articles/biclustering_of_expression_data.pdf},
-  date         = {2000},
-  journaltitle = {Proceedings. International Conference on Intelligent Systems for Molecular Biology},
+  year         = {2000},
+  journal      = {Proceedings. International Conference on Intelligent Systems for Molecular Biology},
   langid       = {english},
   shortjournal = {Proceedings. International Conference on Intelligent Systems for Molecular Biology},
   title        = {Biclustering of {{Expression Data}}},
-  urldate      = {2023-04-18}
+  urlyear      = {2023}
 }
 
 @inproceedings{dhillon2001CoclusteringDocumentsWords,
-  abstract   = {Bothdoumentlusteringandwordlusteringarewellstudiedproblems.Mostexistingalgorithmslusterdouments andwordsseparatelybutnotsimultaneously.Inthispaper wepresentthenovelideaofmodelingthedoumentolletionasabipartitegraphbetweendoumentsandwords,usingwhihthesimultaneouslusteringproblemanbeposed asabipartitegraphpartitioningproblem.Tosolvethepartitioningproblem,weuseanewspetralo-lusteringalgorithmthatusestheseondleftandrightsingularvetorsof anappropriatelysaledword-doumentmatrixtoyieldgood bipartitionings.Thespetralalgorithmenjoyssomeoptimalityproperties;itanbeshownthatthesingularvetors solvearealrelaxationtotheNP-ompletegraphbipartitioningproblem.Wepresentexperimentalresultstoverifythat theresultingo-lusteringalgorithmworkswellinpratie.},
-  author     = {Dhillon, Inderjit S.},
-  location   = {San Francisco California},
-  publisher  = {ACM},
-  url        = {https://doi.org/10.1145/502512.502550},
-  booktitle  = {Proceedings of the Seventh {{ACM SIGKDD}} International Conference on {{Knowledge}} Discovery and Data Mining},
-  date       = {2001-08-26},
-  doi        = {10.1145/502512.502550},
-  eventtitle = {{{KDD01}}: {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} and {{Data Mining}}},
-  file       = {D\:\\zihan\\Zotero\\storage\\VIX9F4QT\\Dhillon - 2001 - Co-clustering documents and words using bipartite .pdf;D\:\\zihan\\Zotero\\storage\\Z6QS73SY\\Dhillon - 2001 - Co-clustering documents and words using bipartite spectral graph partitioning.pdf},
-  isbn       = {978-1-58113-391-2},
-  keywords   = {/unread,good cocluster},
-  langid     = {english},
-  pages      = {269--274},
-  title      = {Co-Clustering Documents and Words Using Bipartite Spectral Graph Partitioning},
-  urldate    = {2024-02-08}
+  title     = {Co-clustering documents and words using bipartite spectral graph partitioning},
+  author    = {Inderjit S. Dhillon},
+  booktitle = {Knowledge Discovery and Data Mining},
+  year      = {2001}
 }
 
 @article{dhillon2007WeightedGraphCuts,
   abstract     = {A variety of clustering algorithms have recently been proposed to handle data that is not linearly separable; spectral clustering and kernel k-means are two of the main methods. In this paper, we discuss an equivalence between the objective functions used in these seemingly different methods - in particular, a general weighted kernel k-means objective is mathematically equivalent to a weighted graph clustering objective. We exploit this equivalence to develop a fast high-quality multilevel algorithm that directly optimizes various weighted graph clustering objectives, such as the popular ratio cut, normalized cut, and ratio association criteria. This eliminates the need for any eigenvector computation for graph clustering problems, which can be prohibitive for very large graphs. Previous multilevel graph partitioning methods such as Metis have suffered from the restriction of equal-sized clusters; our multilevel algorithm removes this restriction by using kernel k-means to optimize weighted graph cuts. Experimental results show that our multilevel algorithm outperforms a state-of-the-art spectral clustering algorithm in terms of speed, memory usage, and quality. We demonstrate that our algorithm is applicable to large-scale clustering tasks such as image segmentation, social network analysis, and gene network analysis.},
   author       = {Dhillon, Inderjit S. and Guan, Yuqiang and Kulis, Brian},
-  url          = {https://doi.org/10.1109/TPAMI.2007.1115},
-  date         = {2007-11},
+  year         = {2007},
   doi          = {10.1109/TPAMI.2007.1115},
   eventtitle   = {{{IEEE Transactions}} on {{Pattern Analysis}} and {{Machine Intelligence}}},
   file         = {D\:\\zihan\\Zotero\\storage\\JGZ4PCF7\\Dhillon 等 - 2007 - Weighted graph cuts without eigenvectors a multilevel approach.pdf;D\:\\zihan\\Zotero\\storage\\96LC4NSW\\4302760.html},
   issn         = {1939-3539},
-  journaltitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
+  journal      = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
   keywords     = {Algorithm design and analysis,Clustering,Clustering algorithms,Data mining,Data Mining,Graph Partitioning,Image analysis,Image segmentation,k-means,Kernel,Large-scale systems,Optimization methods,Partitioning algorithms,Segmentation,Social network services,Spectral Clustering},
   langid       = {english},
   number       = {11},
   pages        = {1944--1957},
   shortjournal = {IEEE Trans. Pattern Anal. Mach. Intell.},
   title        = {Weighted Graph Cuts without Eigenvectors a Multilevel Approach},
-  urldate      = {2024-02-08},
+  urlyear      = {2024},
   volume       = {29}
 }
 
 @article{golchev2015BiclusteringAnalysisGene,
   abstract     = {Clustering is an unsupervised learning technique that groups data into clusters using the entire conditions. However, sometimes, data is similar only under a subset of conditions. Biclustering allows clustering of rows and columns of a dataset simultaneously. It extracts more accurate information from sparse datasets. In recent years, biclustering has found many useful applications in different fields and many biclustering algorithms have been proposed. Using both row and column information of data, biclustering requires the optimization of two conflicting objectives. In this study, a new multi-objective evolutionary biclustering framework using SPEA2 is proposed. A heuristic local search based on the gene and condition deletion and addition is added into SPEA2 and the best bicluster is selected using a new quantitative measure that considers both its coherence and size. The performance of our algorithm is evaluated using simulated and gene expression data and compared with several well-known biclustering methods. The experimental results demonstrate better performance with respect to the size and MSR of detected biclusters and significant enrichment of detected genes.},
   author       = {Golchev, Maryam and Davarpanah, S. H. and Liew, Alan Wee-Chung},
-  url          = {https://doi.org/10.1109/ICMLC.2015.7340608},
-  annotation   = {2 citations (Crossref) [2023-04-27] EI: 是 南农核心: 无 南农高质量: 无},
-  date         = {2015},
+  year         = {2015},
   doi          = {10.1109/ICMLC.2015.7340608},
   file         = {D:\zihan\Zotero\storage\4T2KKGML\Golchev et al. - 2015 - Biclustering analysis of gene expression data usin.pdf},
-  journaltitle = {2015 International Conference on Machine Learning and Cybernetics (ICMLC)},
+  journal      = {2015 International Conference on Machine Learning and Cybernetics (ICMLC)},
   keywords     = {/unread,⛔ No INSPIRE recid found},
   langid       = {english},
   pages        = {505--510},
@@ -158,65 +121,59 @@ @article{golchev2015BiclusteringAnalysisGene
 @article{higham2007SpectralClusteringIts,
   abstract     = {We formulate a discrete optimization problem that leads to a simple and informative derivation of a widely used class of spectral clustering algorithms. Regarding the algorithms as attempting to bi-partition a weighted graph with N vertices, our derivation indicates that they are inherently tuned to tolerate all partitions into two non-empty sets, independently of the cardinality of the two sets. This approach also helps to explain the difference in behaviour observed between methods based on the unnormalized and normalized graph Laplacian. We also give a direct explanation of why Laplacian eigenvectors beyond the Fiedler vector may contain fine-detail information of relevance to clustering. We show numerical results on synthetic data to support the analysis. Further, we provide examples where normalized and unnormalized spectral clustering is applied to microarray data—here the graph summarizes similarity of gene activity across different tissue samples, and accurate clustering of samples is a key task in bioinformatics.},
   author       = {Higham, Desmond J. and Kalna, Gabriela and Kibble, Milla},
-  url          = {https://doi.org/10.1016/j.cam.2006.04.026},
-  date         = {2007-07-01},
+  year         = {2007},
   doi          = {10.1016/j.cam.2006.04.026},
   file         = {D\:\\zihan\\Zotero\\storage\\9CYFGIJN\\Higham 等 - 2007 - Spectral clustering and its use in bioinformatics.pdf;D\:\\zihan\\Zotero\\storage\\6WUE95ZK\\S0377042706002366.html},
   issn         = {0377-0427},
-  journaltitle = {Journal of Computational and Applied Mathematics},
+  journal      = {Journal of Computational and Applied Mathematics},
   keywords     = {Balancing threshold,Fiedler vector,Gene expression,Graph Laplacian,Maximum likelihood,Microarray,Partitioning,Random graph,Rayleigh–Ritz Theorem,Scaling},
   langid       = {english},
   number       = {1},
   pages        = {25--37},
-  series       = {Special Issue Dedicated to {{Professor Shinnosuke Oharu}} on the Occasion of His 65th Birthday},
   shortjournal = {J. Comput. Appl. Math.},
   title        = {Spectral Clustering and Its Use in Bioinformatics},
-  urldate      = {2024-02-09},
+  urlyear      = {2024},
   volume       = {204}
 }
 
 @article{kluger2003SpectralBiclusteringMicroarray,
   abstract     = {Global analyses of RNA expression levels are useful for classifying genes and overall phenotypes. Often these classification problems are linked, and one wants to find ``marker genes'' that are differentially expressed in particular sets of ``conditions.'' We have developed a method that simultaneously clusters genes and conditions, finding distinctive ``checkerboard'' patterns in matrices of gene expression data, if they exist. In a cancer context, these checkerboards correspond to genes that are markedly up- or downregulated in patients with particular types of tumors. Our method, spectral biclustering, is based on the observation that checkerboard structures in matrices of expression data can be found in eigenvectors corresponding to characteristic expression patterns across genes or conditions. In addition, these eigenvectors can be readily identified by commonly used linear algebra approaches, in particular the singular value decomposition (SVD), coupled with closely integrated normalization steps. We present a number of variants of the approach, depending on whether the normalization over genes and conditions is done independently or in a coupled fashion. We then apply spectral biclustering to a selection of publicly available cancer expression data sets, and examine the degree to which the approach is able to identify checkerboard structures. Furthermore, we compare the performance of our biclustering methods against a number of reasonable benchmarks (e.g., direct application of SVD or normalized cuts to raw data).},
   author       = {Kluger, Yuval and Basri, Ronen and Chang, Joseph T. and Gerstein, Mark},
-  publisher    = {Cold Spring Harbor Lab},
-  date         = {2003-04-01},
+  year         = {2003},
   doi          = {10.1101/gr.648603},
   eprint       = {12671006},
   eprinttype   = {pmid},
   issn         = {1088-9051, 1549-5469},
-  journaltitle = {Genome Research},
+  journal      = {Genome Research},
   langid       = {english},
   number       = {4},
   pages        = {703--716},
   shortjournal = {Genome Res.},
-  shorttitle   = {Spectral {{Biclustering}} of {{Microarray Data}}},
-  title        = {Spectral {{Biclustering}} of {{Microarray Data}}: {{Coclustering Genes}} and {{Conditions}}},
+  shorttitle   = {Spectral Biclustering of Microarray Data},
+  title        = {Spectral Biclustering of Microarray Data: Coclustering Genes and Conditions},
   volume       = {13}
 }
 
 @inproceedings{kumar2023CoclusteringBasedMethods,
   abstract  = {In the contemporary era, businesses are driven by Internet based web or mobile applications. In every conceivable area of research, it is indispensable for such applications to have a recommender system to expedite the interactions between customers and business entity for faster convergence. Provided this fact, there is need for leveraging such systems as they have unprecedented impact on businesses across the globe. In this regard, identification of merits and demerits in the existing methods used to realize recommender systems is to be given paramount importance. In this paper, we review literature to ascertain useful facts pertaining to different approaches to make recommender systems. Since recommender systems lubricate the process of commercial or otherwise interactions with consumers, for business entities it is imperative to have applications with built-in recommender system. The literature review made in this paper provides different aspects of recommender systems such as datasets, methods and their utility in the current business scenarios. It throws light into the research gaps that help in further research and improvement based on novel data mining approaches.},
   author    = {Kumar, Naresh and Sheeba, Merlin},
-  editor    = {Morusupalli, Raghava and Dandibhotla, Teja Santosh and Atluri, Vani Vathsala and Windridge, David and Lingras, Pawan and Komati, Venkateswara Rao},
   location  = {Cham},
-  publisher = {Springer Nature Switzerland},
-  url       = {https://doi.org/10.1007/978-3-031-36402-0_48},
-  date      = {2023},
+  year      = {2023},
   doi       = {10.1007/978-3-031-36402-0_48},
   file      = {D:\zihan\Zotero\storage\4UE7FRTS\Kumar and Sheeba - 2023 - Co-clustering Based Methods and Their Significance for Recommender Systems.pdf},
   isbn      = {978-3-031-36402-0},
   keywords  = {/unread,co-clustering 协同聚类,collaborative filtering methods 协同过滤方法,content based filtering methods 基于内容的过滤方法,recommendation system},
   langid    = {english},
   pages     = {513--522},
-  series    = {Lecture {{Notes}} in {{Computer Science}}},
   title     = {Co-Clustering Based Methods and Their Significance for Recommender Systems},
-  volume    = {14078}
+  volume    = {14078},
+  booktitle = {Multi-Disciplinary Trends in Artificial Intelligence: 16th International Conference, MIWAI 2023, Hyderabad, India, July 21-22, 2023, Proceedings}
 }
 
 @inproceedings{long2005CoclusteringBlockValue,
   author    = {Long, Bo and Zhang, Zhongfei and Yu, Philip S},
   booktitle = {Proceedings of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining},
-  date      = {2005},
+  year      = {2005},
   doi       = {10.1145/1081870.1081949},
   pages     = {635--640},
   title     = {Co-clustering by block value decomposition}
@@ -225,11 +182,11 @@ @inproceedings{long2005CoclusteringBlockValue
 @article{madeira2004BiclusteringAlgorithmsBiological,
   abstract     = {A large number of clustering approaches have been proposed for the analysis of gene expression data obtained from microarray experiments. However, the results from the application of standard clustering methods to genes are limited. This limitation is imposed by the existence of a number of experimental conditions where the activity of genes is uncorrelated. A similar limitation exists when clustering of conditions is performed. For this reason, a number of algorithms that perform simultaneous clustering on the row and column dimensions of the data matrix has been proposed. The goal is to find submatrices, that is, subgroups of genes and subgroups of conditions, where the genes exhibit highly correlated activities for every condition. In this paper, we refer to this class of algorithms as biclustering. Biclustering is also referred in the literature as coclustering and direct clustering, among others names, and has also been used in fields such as information retrieval and data mining. In this comprehensive survey, we analyze a large number of existing approaches to biclustering, and classify them in accordance with the type of biclusters they can find, the patterns of biclusters that are discovered, the methods used to perform the search, the approaches used to evaluate the solution, and the target applications.},
   author       = {Madeira, Sara C. and Oliveira, Arlindo L.},
-  date         = {2004},
+  year         = {2004},
   doi          = {10.1109/tcbb.2004.2},
   eprint       = {17048406},
   eprinttype   = {pmid},
-  journaltitle = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},
+  journal      = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},
   shortjournal = {IEEE/acm Trans. Comput. Biol. Bioinformatics},
   title        = {Biclustering Algorithms for Biological Data Analysis: {{A}} Survey}
 }
@@ -237,116 +194,108 @@ @article{madeira2004BiclusteringAlgorithmsBiological
 @article{vonluxburg2007TutorialSpectralClustering,
   abstract     = {In recent years, spectral clustering has become one of the most popular modern clustering algorithms. It is simple to implement, can be solved efficiently by standard linear algebra software, and very often outperforms traditional clustering algorithms such as the k-means algorithm. On the first glance spectral clustering appears slightly mysterious, and it is not obvious to see why it works at all and what it really does. The goal of this tutorial is to give some intuition on those questions. We describe different graph Laplacians and their basic properties, present the most common spectral clustering algorithms, and derive those algorithms from scratch by several different approaches. Advantages and disadvantages of the different spectral clustering algorithms are discussed.},
   author       = {von Luxburg, Ulrike},
-  url          = {https://doi.org/10.1007/s11222-007-9033-z},
-  date         = {2007-12-01},
+  year         = {2007},
   doi          = {10.1007/s11222-007-9033-z},
   file         = {D:\zihan\Zotero\storage\R4CIW2ME\von Luxburg - 2007 - A tutorial on spectral clustering.pdf},
   issn         = {1573-1375},
-  journaltitle = {Statistics and Computing},
+  journal      = {Statistics and Computing},
   keywords     = {Graph Laplacian,Spectral clustering},
   langid       = {english},
   number       = {4},
   pages        = {395--416},
   shortjournal = {Stat. Comput.},
   title        = {A Tutorial on Spectral Clustering},
-  urldate      = {2024-02-08},
+  urlyear      = {2024},
   volume       = {17}
 }
 
 @article{wu2023EffectiveClusteringStructured,
   abstract     = {Given an affinity graph of data samples, graph-based clustering aims to partition these samples into disjoint groups based on the affinities, and most previous works are based on spectral clustering. However, two problems among spectral-based methods heavily affect the clustering performance. First, the randomness of post-processing procedures, such as KK-means, affects the stability of clustering. Second, the separated stages of spectral-based methods, including graph construction, spectral embedding learning, and clustering decision, lead to mismatched problems. In this paper, we explore a structured graph learning (SGL) framework that aims to fuse these stages to improve clustering stability. Specifically, SGL adaptively learns a structured affinity graph that contains exact kk connected components. Each connected component corresponds to a cluster so clustering assignments can be directly obtained according to the connectivity of the learned graph. In this way, SGL avoids the randomness brought by reliance on traditional post-processing procedures. Meanwhile, the graph construction and structured graph learning procedures happen simultaneously, which alleviates the mismatched problem effectively. Moreover, we propose an efficient algorithm to solve the involved optimization problems and discuss the connections between this work and previous works. Numerical experiments on several synthetic and real datasets demonstrate the effectiveness of our methods.},
   author       = {Wu, Danyang and Nie, Feiping and Lu, Jitao and Wang, Rong and Li, Xuelong},
-  url          = {https://doi.org/10.1109/TKDE.2022.3222411},
-  date         = {2023-08},
+  year         = {2023},
   doi          = {10.1109/TKDE.2022.3222411},
   eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
   file         = {D:\zihan\Zotero\storage\VETKFYQY\9950731.html},
   issn         = {1558-2191},
-  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
+  journal      = {IEEE Transactions on Knowledge and Data Engineering},
   keywords     = {adaptive neighbors,block diagonal similarity matrix,Clustering,Clustering algorithms,Clustering methods,Eigenvalues and eigenfunctions,Optimization,Partitioning algorithms,Stability analysis,structured graph learning,Task analysis},
   langid       = {english},
   number       = {8},
   pages        = {7909--7920},
   shortjournal = {IEEE Trans. Knowl. Data Eng.},
   title        = {Effective Clustering via Structured Graph Learning},
-  urldate      = {2024-02-09},
+  urlyear      = {2024},
   volume       = {35}
 }
 
 @inproceedings{dongkuanxu2019DeepCoClustering,
-  author       = {Xu, Dongkuan and Cheng, Wei and Zong, Bo and Ni, Jingchao and Song, Dongjin and Yu, Wenchao and Chen, Yuncong and Chen, Haifeng and Zhang, Xiang},
-  organization = {SIAM},
-  booktitle    = {Proceedings of the 2019 SIAM International Conference on Data Mining},
-  date         = {2019},
-  pages        = {414--422},
-  title        = {Deep co-clustering}
+  author    = {Xu, Dongkuan and Cheng, Wei and Zong, Bo and Ni, Jingchao and Song, Dongjin and Yu, Wenchao and Chen, Yuncong and Chen, Haifeng and Zhang, Xiang},
+  booktitle = {Proceedings of the 2019 SIAM International Conference on Data Mining},
+  year      = {2019},
+  pages     = {414--422},
+  title     = {Deep co-clustering}
 }
 
 @article{yan2017CoclusteringMultidimensionalBig,
   abstract     = {The analysis of a multidimensional data array is necessary in many applications. Although a data set can be very large, it is possible that meaningful and coherent patterns embedded in the data array are much smaller in size. For example, in genomic data, we may want to find a subset of genes that coexpress under a subset of conditions. In this article, I will explain coclustering algorithms for solving the coherent pattern-detection problem. In these methods, a coherent pattern corresponds to a low-rank matrix or tensor and can be represented as an intersection of hyperplanes in a high-dimensional space. We can then extract coherent patterns from the large data array by detecting hyperplanes. Examples will be provided to demonstrate the effectiveness of the coclustering algorithms for solving unsupervised pattern classification problems.},
   author       = {Yan, Hong},
-  publisher    = {Institute of Electrical and Electronics Engineers (IEEE)},
-  date         = {2017-04},
+  year         = {2017},
   doi          = {10.1109/msmc.2017.2664218},
   issn         = {2333-942X, 2380-1298},
-  journaltitle = {IEEE Systems, Man, and Cybernetics Magazine},
+  journal      = {IEEE Systems, Man, and Cybernetics Magazine},
   langid       = {english},
   number       = {2},
   pages        = {23--30},
   shortjournal = {IEEE Syst. Man Cybern. Mag.},
-  title        = {Coclustering of {{Multidimensional Big Data}}: {{A Useful Tool}} for {{Genomic}}, {{Financial}}, and {{Other Data Analysis}}},
+  title        = {Coclustering of Multidimensional Big Data: A Useful Tool for Genomic, Financial, and Other Data Analysis},
   volume       = {3}
 }
 
 @article{yuan2023JointNetworkTopology,
   abstract     = {Joint network topology inference represents a canonical problem of jointly learning multiple graph Laplacian matrices from heterogeneous graph signals. In such a problem, a widely employed assumption is that of a simple common component shared among multiple graphs. However, in practice, a more intricate topological pattern, comprising simultaneously of homogeneous and heterogeneous components, would exhibit in multiple graphs. In this paper, we propose a general graph estimator based on a novel structural fusion regularization that enables us to jointly learn multiple graphs with such complex topological patterns, and enjoys rigorous theoretical guarantees. Specifically, in the proposed regularization term, the structural similarity among graphs is characterized by a Gram matrix, which enables us to flexibly model different types of network structural similarities through different Gram matrix choices. Algorithmically, the regularization term, coupling the parameters together, makes the formulated optimization problem intractable, and thus, we develop an implementable algorithm based on the alternating direction method of multipliers (ADMM) to solve it. Theoretically, non-asymptotic statistical analysis is provided, which precisely characterizes the minimum sample size required for the consistency of the graph estimator. This analysis also provides high-probability bounds on the estimation error as a function of graph structural similarities and other key problem parameters. Finally, the superior performance of the proposed method is demonstrated through simulated and real data examples.},
   author       = {Yuan, Yanli and Soh, De Wen and Guo, Kun and Xiong, Zehui and Quek, Tony Q. S.},
-  url          = {https://doi.org/10.1109/TKDE.2023.3264971},
-  date         = {2023-10},
+  year         = {2023},
   doi          = {10.1109/TKDE.2023.3264971},
   eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
   file         = {D\:\\zihan\\Zotero\\storage\\RL32UYSL\\Yuan 等 - 2023 - Joint network topology inference via structural fusion regularization.pdf;D\:\\zihan\\Zotero\\storage\\34XLMT56\\10093994.html},
   issn         = {1558-2191},
-  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
+  journal      = {IEEE Transactions on Knowledge and Data Engineering},
   keywords     = {Correlation,Graph Laplacian,graph signals,Laplace equations,Network topology,network topology inference,non-asymptotic statistical analysis,regularization,Social networking (online),Statistical analysis,Task analysis,Topology},
   langid       = {english},
   number       = {10},
   pages        = {10351--10364},
   shortjournal = {IEEE Trans. Knowl. Data Eng.},
   title        = {Joint Network Topology Inference via Structural Fusion Regularization},
-  urldate      = {2024-02-09},
+  urlyear      = {2024},
   volume       = {35}
 }
 
 @article{zhang2023AdaptiveGraphConvolution,
   abstract     = {Attributed graph clustering is a challenging task as it requires to jointly model graph structure and node attributes. Although recent advances in graph convolutional networks have shown the effectiveness of graph convolution in combining structural and content information, there is limited understanding of how to properly apply it for attributed graph clustering. Previous methods commonly use a fixed and low order graph convolution, which only aggregates information of few-hop neighbours and hence cannot fully capture the cluster structures of diverse graphs. In this paper, we first propose an adaptive graph convolution method (AGC) for attributed graph clustering, which exploits high-order graph convolutions to capture global cluster structures and adaptively selects an appropriate order kk via intra-cluster distance. While AGC can find a reasonable kk and avoid over-smoothing, it is not sensitive to the gradual decline of clustering performance as kk increases. To search for a better kk, we further propose an improved adaptive graph convolution method (IAGC) that not only observes the variation of intra-cluster distance, but also considers the inconsistencies of filtered features with graph structure and raw features, respectively. We establish the validity of our methods by theoretical analysis and extensive experiments on various benchmark datasets.},
   author       = {Zhang, Xiaotong and Liu, Han and Li, Qimai and Wu, Xiao-Ming and Zhang, Xianchao},
-  url          = {https://doi.org/10.1109/TKDE.2023.3278721},
-  date         = {2023-12},
+  year         = {2023},
   doi          = {10.1109/TKDE.2023.3278721},
   eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
   file         = {D\:\\zihan\\Zotero\\storage\\IETDDZCP\\Zhang 等 - 2023 - Adaptive graph convolution methods for attributed graph clustering.pdf;D\:\\zihan\\Zotero\\storage\\T59D3IJ5\\10130603.html},
   issn         = {1558-2191},
-  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
+  journal      = {IEEE Transactions on Knowledge and Data Engineering},
   keywords     = {Adaptation models,Adaptive graph convolution,attributed graph clustering,Automatic generation control,Clustering methods,Convolution,Feature extraction,low-pass graph filter,Proteins,Social networking (online)},
   langid       = {english},
   number       = {12},
   pages        = {12384--12399},
   shortjournal = {IEEE Trans. Knowl. Data Eng.},
   title        = {Adaptive Graph Convolution Methods for Attributed Graph Clustering},
-  urldate      = {2024-02-09},
+  urlyear      = {2024},
   volume       = {35}
 }
 
 @article{zhao2012BiclusteringAnalysisPattern,
   author       = {Zhao, Hongya and Wee-Chung Liew, Alan and Z. Wang, Doris and Yan, Hong},
-  url          = {https://doi.org/10.2174/157489312799304413},
-  annotation   = {31 citations (Crossref) [2022-10-21] JCR分区: Q1 中科院分区升级版: 生物学4区 影响因子: 4.85 5年影响因子: 3.111 JCI: 0.86 南农核心: 无 南农高质量: 无},
-  date         = {2012-03-01},
+  year         = {2012},
   doi          = {10.2174/157489312799304413},
   file         = {D:\zihan\Zotero\storage\62MPAKFA\Biclustering_Analysis_for_Pattern_Discovery_Zhao_et_al_2012.pdf},
   issn         = {15748936},
-  journaltitle = {Current Bioinformatics},
+  journal      = {Current Bioinformatics},
   keywords     = {/unread,⛔ No INSPIRE recid found,yan cocluster},
   langid       = {english},
   number       = {1},
@@ -354,27 +303,25 @@ @article{zhao2012BiclusteringAnalysisPattern
   shortjournal = {Curr. Bioinf.},
   shorttitle   = {Biclustering {{Analysis}} for {{Pattern Discovery}}},
   title        = {Biclustering Analysis for Pattern Discovery: Current Techniques, Comparative Studies and Applications},
-  urldate      = {2022-09-29},
+  urlyear      = {2022},
   volume       = {7}
 }
 
 @article{zhao2023MultiviewCoclusteringMultisimilarity,
   abstract     = {Multi-view co-clustering, which clustering the two dimensions of samples and features of multi-view data at the same time, has attracted much attention in recent years. It aims to exploit the duality of multi-view data to get better clustering results. However, most of the existing multi-view co-clustering algorithms consider the sample-feature information of the data while ignoring the sample-sample, feature-feature information, and thus cannot fully mine the potential information contained in the data. Therefore, this paper proposes a multi-view co-clustering based on multi-similarity. In particular, based on spectral clustering, we propose a method of constructing graph to improve the performance of clustering, which is no longer limited to the relevance between samples and features. At the same time, inspired by the ensemble algorithm, we use multiple co-clustering algorithms to calculate the similarity information of each view data, which makes the algorithm more robust. Compared with the existing multi-view co-clustering methods, the proposed algorithm exploits the more comprehensive similarity information in each view data, including sample-sample, feature-feature, and sample-feature similarity information. We performed experiments on several benchmark datasets. Due to mining and using more similarity information, our experimental results are better than the comparison method in the three evaluation indicators. In particular, on some data with co-occurrence features such as (word-document), our algorithm achieves better results and can obtain higher accuracy.},
   author       = {Zhao, Ling and Ma, Yunpeng and Chen, Shanxiong and Zhou, Jun},
-  url          = {https://doi.org/10.1007/s10489-022-04385-4},
-  annotation   = {影响因子: 5.3 CCF: C 5年影响因子: 5.2},
-  date         = {2023-07-01},
+  year         = {2023},
   doi          = {10.1007/s10489-022-04385-4},
   file         = {D:\zihan\Zotero\storage\TJXWYC8K\Zhao et al. - 2023 - Multi-view co-clustering with multi-similarity.pdf},
   issn         = {1573-7497},
-  journaltitle = {Applied Intelligence},
+  journal      = {Applied Intelligence},
   keywords     = {/unread,Co-clustering,Ensemble,Multi-view clustering,Similarity},
   langid       = {english},
   number       = {13},
   pages        = {16961--16972},
   shortjournal = {Appl. Intell.},
   title        = {Multi-View Co-Clustering with Multi-Similarity},
-  urldate      = {2023-12-11},
+  urlyear      = {2023},
   volume       = {53}
 }
 
@@ -382,16 +329,13 @@ @inproceedings{hansen2011NonparametricCoclusteringLarge
   address   = {Beijing, China},
   title     = {Non-parametric co-clustering of large scale sparse bipartite networks on the {GPU}},
   isbn      = {978-1-4577-1621-8},
-  url       = {http://ieeexplore.ieee.org/document/6064611/},
   doi       = {10.1109/MLSP.2011.6064611},
   language  = {en},
-  urldate   = {2024-07-14},
+  urlyear   = {2024},
   booktitle = {2011 {IEEE} {International} {Workshop} on {Machine} {Learning} for {Signal} {Processing}},
-  publisher = {IEEE},
   author    = {Hansen, Toke Jansen and Morup, Morten and Kai Hansen, Lars},
   month     = sep,
   year      = {2011},
-  note      = {7 citations (Crossref/DOI) [2024-07-14]},
   pages     = {1--6},
   file      = {Full Text:/Volumes/Mac_Ext/Zotero/storage/WT8X23UZ/Hansen et al. - 2011 - Non-parametric co-clustering of large scale sparse bipartite networks on the GPU.pdf:application/pdf;Snapshot:/Volumes/Mac_Ext/Zotero/storage/U7AEDNZQ/non-parametric-co-clustering-of-large-scale-sparse-bipartite-mwzk6re37l.html:text/html}
 }
@@ -403,13 +347,11 @@ @inproceedings{pan2008CRDFastCoclusteringa
   volume     = {2008},
   isbn       = {978-1-60558-102-6},
   shorttitle = {{CRD}},
-  url        = {https://dl.acm.org/doi/10.1145/1376616.1376637},
   doi        = {10.1145/1376616.1376637},
   abstract   = {The problem of simultaneously clustering columns and rows (coclustering) arises in important applications, such as text data mining, microarray analysis, and recommendation system analysis. Compared with the classical clustering algorithms, co-clustering algorithms have been shown to be more effective in discovering hidden clustering structures in the data matrix. The complexity of previous co-clustering algorithms is usually O(m × n), where m and n are the numbers of rows and columns in the data matrix respectively. This limits their applicability to data matrices involving a large number of columns and rows. Moreover, some huge datasets can not be entirely held in main memory during co-clustering which violates the assumption made by the previous algorithms. In this paper, we propose a general framework for fast co-clustering large datasets, CRD. By utilizing recently developed sampling-based matrix decomposition methods, CRD achieves an execution time linear in m and n. Also, CRD does not require the whole data matrix be in the main memory. We conducted extensive experiments on both real and synthetic data. Compared with previous co-clustering algorithms, CRD achieves competitive accuracy but with much less computational cost.},
   language   = {en},
-  urldate    = {2024-07-14},
+  urlyear    = {2024},
   booktitle  = {Proceedings of the 2008 {ACM} {SIGMOD} international conference on {Management} of data},
-  publisher  = {ACM},
   author     = {Pan, Feng and Zhang, Xiang and Wang, Wei},
   month      = jun,
   year       = {2008}
@@ -421,8 +363,7 @@ @article{lewis2004rcv1
   journal = {Journal of Machine Learning Research},
   volume  = {5},
   pages   = {361--397},
-  year    = {2004},
-  url     = {http://www.jmlr.org/papers/volume5/lewis04a/lewis04a.pdf}
+  year    = {2004}
 }
 
 @inproceedings{ni2019justifying,