add first exercise sheet

adf8de76 · Marie Weiel · 41e3851a · adf8de76
Commit adf8de76 authored 4 months ago by Marie Weiel
--- a/1_kmeans/sheet_1.ipynb
+++ b/1_kmeans/sheet_1.ipynb
@@ -8,14 +8,14 @@
    "# Skalierbare Methoden der Künstlichen Intelligenz\n",
    "Dr. Charlotte Debus (charlotte.debus@kit.edu)  \n",
    "Dr. Markus Götz (markus.goetz@kit.edu)  \n",
-    "Dr. Marie Weiel (marie.weiel@kit.edu)\n",
+    "Dr. Marie Weiel (marie.weiel@kit.edu)  \n",
    "Dr. Kaleb Phipps (kaleb.phipps@kit.edu)\n",
    "\n",
    "## Übung 1 am 19.11.24: Paralleles k-Means-Clustering\n",
    "In der ersten Übung beschäftigen wir uns mit der k-Means-Clusteranalyse und möglichen Parallelisierungsansätzen (siehe Vorlesung vom 07.11.24). Dazu verwenden wir den [Cityscapes](https://www.cityscapes-dataset.com/)-Datensatz. Dieser Datensatz bietet unter anderem 5000 hochaufgelöste Bilder von Straßenszenen aus 50 verschiedenen Städten.\n",
    "Jedes dieser Bilder besteht aus 2048 x 1024 Pixeln mit drei 256-Bit RGB-Farbkanälen pro Pixel, die in einer \"Short-Fat\"-Matrix mit 5000 x 6 291 456 seriellen Einträgen zusammengefasst sind: 5000 Bilder x (3 Kanäle x 2048 Pixel x 1024 Pixel) = 5000 x 6 291 456  \n",
-    "Für unsere Aufgabe benutzen wir 300 dieser Samples. Sie finden diese auf dem bwUniCluster im Workspace `VL_ScalableAI` unter folgendem Pfad:  \n",
-    "`/pfs/work7/workspace/scratch/ku4408-VL_ScalableAI/data/cityscapes_300.h5`\n",
+    "Für unsere Aufgabe benutzen wir 300 dieser Samples. Sie finden diese auf dem bwUniCluster im Workspace `VL-ScalableAI` unter folgendem Pfad:  \n",
+    "`/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/cityscapes_300.h5`\n",
    "### Aufgabe 1\n",
    "Untenstehend finden Sie eine serielle Implementierung des k-Means-Algorithmus in Python 3 unter Verwendung der Programmbibliothek für maschinelles Lernen [PyTorch](https://pytorch.org/). \n",
    "Führen Sie den Code auf einem CPU-basierten Knoten bzw. einer GPU auf dem bwUniCluster aus. Beachten Sie dabei, dass der Code für die GPU-Nutzung angepasst werden muss. Vergleichen Sie die Laufzeit. Was fällt Ihnen auf? \n",
@@ -65,8 +65,9 @@
    "\"\"\"\n",
    "Serial implementation of k-means clustering in PyTorch\n",
    "\"\"\"\n",
-    "import h5py\n",
    "import time\n",
+    "\n",
+    "import h5py\n",
    "import torch\n",
    "\n",
    "\n",
@@ -114,11 +115,11 @@
    "        tol : float\n",
    "            The tolerance for the convergence criterion.\n",
    "        \"\"\"\n",
-    "        self.n_clusters = n_clusters  # number of clusters\n",
-    "        self.max_iter = max_iter  # maximum number of iterations\n",
+    "        self.n_clusters = n_clusters  # Number of clusters\n",
+    "        self.max_iter = max_iter  # Maximum number of iterations\n",
    "        self._centroids = None\n",
    "        self._matching_centroids = None\n",
-    "        self.tol = tol  # tolerance for convergence criterion\n",
+    "        self.tol = tol  # Tolerance for convergence criterion\n",
    "        self._inertia = float(\"nan\")\n",
    "\n",
    "    def _initialize_centroids(self, x: torch.Tensor) -> None:\n",
@@ -217,7 +218,9 @@
    "\n",
    "    path = \"/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/cityscapes_300.h5\"\n",
    "    dataset = \"cityscapes_data\"\n",
-    "\n",
+    "    ## ADAPT CODE HERE TO ENABLE GPU USAGE:\n",
+    "    device =  torch.device(\"cpu\")\n",
+    "    \n",
    "    print(f\"Loading dataset from {path}[{dataset}]...\")\n",
    "    # Data is available in HDF5 format.\n",
    "    # An HDF5 file is a container for two kinds of objects:\n",
@@ -231,7 +234,7 @@
    "        print(\"Open h5 file...\")\n",
    "        data = torch.tensor(\n",
    "            handle[dataset][:300], device=device\n",
-    "        )  # default: device =\"cpu\"; set device=\"cuda\" for GPU\n",
+    "        )  # Default device is \"cpu\"; set device to \"cuda\" for GPU usage.\n",
    "    print(\"Torch tensor created.\")\n",
    "\n",
    "    # k-means hyperparameters\n",
@@ -300,8 +303,9 @@
    "\"\"\"\n",
    "Sample-parallel implementation of k-means clustering in PyTorch using MPI\n",
    "\"\"\"\n",
-    "import h5py\n",
    "import time\n",
+    "\n",
+    "import h5py\n",
    "import torch\n",
    "from mpi4py import MPI\n",
    "\n",
@@ -318,11 +322,11 @@
    "        tol: float = -1.0, \n",
    "    ) -> None:\n",
    "        \"\"\"Configure sample-parallel k-means clustering algorithm.\"\"\"\n",
-    "        self.comm = comm  # The communicator used.\n",
+    "        self.comm = comm  # The communicator used\n",
    "        pass\n",
    "\n",
    "\n",
-    "# Implementierung Sample-parallele Version HIER.\n",
+    "## IMPLEMENT SAMPLE-PARALLEL K-MEANS CLUSTERING VERSION HERE!\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    \n",
@@ -331,9 +335,9 @@
    "\n",
    "    if rank == 0:\n",
    "        print(\n",
-    "            \"##############################\\n\"\n",
-    "            \"# PyTorch k-Means Clustering #\\n\"\n",
-    "            \"##############################\"\n",
+    "            \"#################################################\\n\"\n",
+    "            \"# Sample-Parallel k-Means Clustering in PyTorch #\\n\"\n",
+    "            \"#################################################\"\n",
    "        )\n",
    "        \n",
    "    path = \"/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/cityscapes_300.h5\"\n",
@@ -387,8 +391,9 @@
    "\"\"\"\n",
    "Feature-parallel implementation of k-means clustering in PyTorch using MPI\n",
    "\"\"\"\n",
-    "import h5py\n",
    "import time\n",
+    "\n",
+    "import h5py\n",
    "import torch\n",
    "from mpi4py import MPI\n",
    "\n",
@@ -408,19 +413,19 @@
    "        self.comm = comm  # The communicator used.\n",
    "        pass\n",
    "\n",
-    "# Implementierung Feature-parallele Version HIER.\n",
+    "## IMPLEMENT FEATURE-PARALLEL K-MEANS CLUSTERING VERSION HERE!\n",
    "\n",
    "\n",
-    "if __name == \"__main__\":\n",
+    "if __name__ == \"__main__\":\n",
    "\n",
    "    comm = MPI.COMM_WORLD\n",
    "    rank, size = comm.rank, comm.size\n",
    "    \n",
    "    if rank == 0:\n",
    "        print(\n",
-    "            \"##############################\\n\"\n",
-    "            \"# PyTorch k-Means Clustering #\\n\"\n",
-    "            \"##############################\"\n",
+    "            \"##################################################\\n\"\n",
+    "            \"# Feature-Parallel k-Means Clustering in PyTorch #\\n\"\n",
+    "            \"##################################################\"\n",
    "        )\n",
    "        \n",
    "    path = \"/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/cityscapes_300.h5\"\n",

 %% Cell type:markdown id:ea5b6890 tags:

 # Skalierbare Methoden der Künstlichen Intelligenz
 Dr. Charlotte Debus (charlotte.debus@kit.edu)
 Dr. Markus Götz (markus.goetz@kit.edu)
 Dr. Marie Weiel (marie.weiel@kit.edu)
 Dr. Kaleb Phipps (kaleb.phipps@kit.edu)

 ## Übung 1 am 19.11.24: Paralleles k-Means-Clustering
 In der ersten Übung beschäftigen wir uns mit der k-Means-Clusteranalyse und möglichen Parallelisierungsansätzen (siehe Vorlesung vom 07.11.24). Dazu verwenden wir den [Cityscapes](https://www.cityscapes-dataset.com/)-Datensatz. Dieser Datensatz bietet unter anderem 5000 hochaufgelöste Bilder von Straßenszenen aus 50 verschiedenen Städten.
 Jedes dieser Bilder besteht aus 2048 x 1024 Pixeln mit drei 256-Bit RGB-Farbkanälen pro Pixel, die in einer "Short-Fat"-Matrix mit 5000 x 6 291 456 seriellen Einträgen zusammengefasst sind: 5000 Bilder x (3 Kanäle x 2048 Pixel x 1024 Pixel) = 5000 x 6 291 456
-Für unsere Aufgabe benutzen wir 300 dieser Samples. Sie finden diese auf dem bwUniCluster im Workspace `VL_ScalableAI` unter folgendem Pfad:
-`/pfs/work7/workspace/scratch/ku4408-VL_ScalableAI/data/cityscapes_300.h5`
+Für unsere Aufgabe benutzen wir 300 dieser Samples. Sie finden diese auf dem bwUniCluster im Workspace `VL-ScalableAI` unter folgendem Pfad:
+`/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/cityscapes_300.h5`
 ### Aufgabe 1
 Untenstehend finden Sie eine serielle Implementierung des k-Means-Algorithmus in Python 3 unter Verwendung der Programmbibliothek für maschinelles Lernen [PyTorch](https://pytorch.org/).
 Führen Sie den Code auf einem CPU-basierten Knoten bzw. einer GPU auf dem bwUniCluster aus. Beachten Sie dabei, dass der Code für die GPU-Nutzung angepasst werden muss. Vergleichen Sie die Laufzeit. Was fällt Ihnen auf?

 *Hinweis: Laden Sie zunächst die benötigten Module auf dem bwUniCluster. Setzen Sie dann eine virtuelle Umgebung mit Python auf, in der Sie die benötigten Pythonpakete installieren. Erstellen Sie basierend auf untenstehendem Code ein Python-Skript, welches Sie mithilfe eines Bash-Skripts über SLURM auf dem Cluster submitten (siehe Übung vom 05.11.24). Nachfolgend finden Sie ein Template für das Submit-Skript für den CPU-Job inklusive der benötigten Module. Für die GPU-Nutzung müssen die #SBATCH-Optionen entsprechend angepasst werden. Weitere Informationen dazu finden Sie [hier](https://wiki.bwhpc.de/e/BwUniCluster_2.0_Slurm_common_Features).*

 %% Cell type:code id:27a9e43b tags:

 ``` python
 #!/bin/bash

 #SBATCH --job-name=kmeans_cpu              # job name
 #SBATCH --partition=single                 # queue for resource allocation
 #SBATCH --time=30:00                       # wall-clock time limit
 #SBATCH --mem=40000                        # memory
 #SBATCH --nodes=1                          # number of nodes to be used
 #SBATCH --mail-type=ALL                    # Notify user by email when certain event types occur.
 #SBATCH --mail-user=u????@student.kit.edu  # notification email address

 export VENVDIR=<path/to/your/venv/folder>  # Export path to your Python3.11 virtual environment.
 export PYDIR=<path/to/your/python/script>  # Export path to directory containing Python script.

 # Set up modules.
 module purge                               # Unload all currently loaded modules.
 module load compiler/gnu/13.3              # Load required modules.
 module load mpi/openmpi/4.1
 module load devel/cuda/12.4
 module load lib/hdf5/1.14.4-gnu-13.3-openmpi-4.1

 source ${VENVDIR}/bin/activate # Activate your virtual environment.

 python -u ${PYDIR}/kmeans.py   # Run your Python script.
 ```

 %% Cell type:code id:4113e8f9-128b-4d10-add7-4a64d470456e tags:

 ``` python
 """
 Serial implementation of k-means clustering in PyTorch
 """
-import h5py
 import time
+
+import h5py
 import torch


 class KMeans:
    """
    Serial k-means clustering in PyTorch.

    Attributes
    ----------
    n_clusters : int
        The number of clusters, i.e., k.
    max_iter : int
        The maximum number of iterations to perform.
    tol : float
        The tolerance for the convergence criterion.
    _centroids : Union[None, torch.Tensor]
        The current centroids.
    _matching_centroids : Union[None, torch.Tensor]
        Assigned centroids for all samples in dataset.
    _inertia : float
        The inertia (quantity to be checked for convergence).

    Methods
    -------
    _initialize_centroids(x)
        Randomly initialize centroids.
    _fit_to_cluster(x)
        Get the closest centroid for each sample in dataset.
    fit(x)
        Perform k-means clustering.
    """

    def __init__(
        self, n_clusters: int = 8, max_iter: int = 300, tol: float = -1.0
    ) -> None:
        """
        Configure k-means clustering algorithm.

        Parameters
        ----------
        n_clusters : int
            The number of clusters, i.e., k.
        max_iter : int
            The maximum number of iterations to be performed.
        tol : float
            The tolerance for the convergence criterion.
        """
-        self.n_clusters = n_clusters  # number of clusters
-        self.max_iter = max_iter  # maximum number of iterations
+        self.n_clusters = n_clusters  # Number of clusters
+        self.max_iter = max_iter  # Maximum number of iterations
        self._centroids = None
        self._matching_centroids = None
-        self.tol = tol  # tolerance for convergence criterion
+        self.tol = tol  # Tolerance for convergence criterion
        self._inertia = float("nan")

    def _initialize_centroids(self, x: torch.Tensor) -> None:
        """
        Randomly initialize the centroids.

        Parameters
        ----------
        x : torch.Tensor
            The dataset to be clustered.
        """
        # Shuffle data and choose first `n_clusters` samples as initial centroids.
        self._centroids = x[torch.randperm(x.shape[0])[: self.n_clusters]]

    def _fit_to_cluster(self, x: torch.Tensor) -> torch.Tensor:
        """
        Determine the closest centroids for each sample in dataset as measured by their Euclidean distance.

        Parameters
        ----------
        x : torch.Tensor
            The dataset to be clustered.

        Returns
        -------
        torch.Tensor
            Indices of matching centroids for each sample in dataset.
        """
        distances = torch.cdist(
            x, self._centroids
        )  # Calculate Euclidian distance of each data sample to each current centroid.
        return distances.argmin(
            dim=1, keepdim=True
        )  # Return index of the closest centroid for each sample.

    def fit(self, x: torch.Tensor) -> "KMeans":
        """
        Perform k-means clustering of given dataset.

        Parameters
        ----------
        x : torch.Tensor
            The dataset to cluster.

        Returns
        -------
        KMeans
            The fitted KMeans object containing final centroids.
        """
        self._initialize_centroids(x)  # Initialize centroids.
        new_cluster_centers = self._centroids.clone()

        # Iteratively fit points to centroids.
        for idx in range(self.max_iter):
            # Determine index of the closest centroid for each sample in dataset.
            print(f"Iteration {idx}...")
            self._matching_centroids = self._fit_to_cluster(
                x
            )  # Array of length `n_samples` providing index of closest centroid for each sample in dataset.

            # Update centroids.
            for i in range(self.n_clusters):  # Loop over clusters.
                # Determine all points in current cluster.
                selection_mask = (self._matching_centroids == i).type(torch.int64)
                # Array of length `n_samples` with binary encoding of whether each sample belongs to cluster i or not.

                assigned_points = (x * selection_mask).sum(
                    axis=0, keepdim=True
                )  # Compute vectorial sum of all points in current cluster.
                points_in_cluster = selection_mask.sum(axis=0, keepdim=True).clamp(
                    1, torch.iinfo(torch.int64).max
                )  # Compute number of points in current cluster.
                new_cluster_centers[i : i + 1, :] = (
                    assigned_points / points_in_cluster
                )  # Compute new centroids.

            # Check whether centroid movement has converged.
            self._inertia = (
                (self._centroids - new_cluster_centers) ** 2
            ).sum()  # Update inertia.
            self._centroids = new_cluster_centers.clone()
            if (
                self.tol is not None and self._inertia <= self.tol
            ):  # Check whether inertia is smaller than tolerance.
                break

        return self


 if __name__ == "__main__":
    print(
        "##############################\n"
        "# PyTorch k-Means Clustering #\n"
        "##############################"
    )

    path = "/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/cityscapes_300.h5"
    dataset = "cityscapes_data"
+    ## ADAPT CODE HERE TO ENABLE GPU USAGE:
+    device =  torch.device("cpu")

    print(f"Loading dataset from {path}[{dataset}]...")
    # Data is available in HDF5 format.
    # An HDF5 file is a container for two kinds of objects:
    # - datasets: array-like collections of data
    # - groups: folder-like containers holding datasets and other groups
    # Most fundamental thing to remember when using h5py is:
    # Groups work like dictionaries, and datasets work like NumPy arrays.

    # Open file for reading. We use the Cityscapes dataset.
    with h5py.File(path, "r") as handle:
        print("Open h5 file...")
        data = torch.tensor(
            handle[dataset][:300], device=device
-        )  # default: device ="cpu"; set device="cuda" for GPU
+        )  # Default device is "cpu"; set device to "cuda" for GPU usage.
    print("Torch tensor created.")

    # k-means hyperparameters
    num_clusters = 8
    num_iterations = 20

    kmeans_clusterer = KMeans(n_clusters=num_clusters, max_iter=num_iterations)
    print("Start fitting the data...")
    start = time.perf_counter()  # Start timer.
    kmeans_clusterer.fit(data)  # Perform k-means clustering.
    print(f"DONE.\nRun time: \t{time.perf_counter() - start} s")  # Measure and print runtime.
 ```

 %% Cell type:markdown id:6c7900f7 tags:

 ### Aufgabe 2
 Implementieren Sie ausgehend von obigem Code eine Sample-parallele Version des k-Means-Algorithmus. Orientieren Sie sich dabei an der obenstehenden seriellen Implementierung.
 Das Interface bzw. die Benutzung der Klasse im eigentlichen Ausführungsteil des Codes soll gleich bleiben. Für die Parallelisierung benötigen Sie einen entsprechend parallelisierten Dataloader. Diesen finden Sie im untenstehenden Code-Fragment. Testen Sie Ihren Code auf vier Knoten des bwUniClusters. Untenstehend finden Sie ein entsprechendes Submit-Skript in bash.

 %% Cell type:code id:691eeb17 tags:

 ``` python
 #!/bin/bash

 #SBATCH --job-name=kmeans_sample           # job name
 #SBATCH --partition=multiple               # queue for the resource allocation.
 #SBATCH --time=30:00                       # wall-clock time limit
 #SBATCH --mem=40000                        # memory per node
 #SBATCH --nodes=4                          # number of nodes to be used
 #SBATCH --cpus-per-task=40                 # number of CPUs required per MPI task
 #SBATCH --ntasks-per-node=1                # maximum count of tasks per node
 #SBATCH --mail-type=ALL                    # Notify user by email when certain event types occur.
 #SBATCH --mail-user=u????@student.kit.edu  # notification email address

 export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}
 export VENVDIR=<path/to/your/venv/folder>  # Export path to your Python3.11 virtual environment.
 export PYDIR=<path/to/your/python/script>  # Export path to directory containing Python script.

 # Set up modules.
 module purge                               # Unload all currently loaded modules.
 module load compiler/gnu/13.3              # Load required modules.
 module load mpi/openmpi/4.1
 module load devel/cuda/12.4
 module load lib/hdf5/1.14.4-gnu-13.3-openmpi-4.1

 source ${VENVDIR}/bin/activate # Activate your virtual environment.

 mpirun python ${PYDIR}/kmeans_sample_parallel.py # Run your Python script in parallel.
 ```

 %% Cell type:code id:eac3eac5 tags:

 ``` python
 """
 Sample-parallel implementation of k-means clustering in PyTorch using MPI
 """
-import h5py
 import time
+
+import h5py
 import torch
 from mpi4py import MPI


 class KMeans:
    """
    Sample-parallel k-means clustering in PyTorch using MPI.
    """
    def __init__(
        self,
        comm: MPI.Comm = MPI.COMM_WORLD,
        n_clusters: int = 8,
        max_iter: int = 300,
        tol: float = -1.0,
    ) -> None:
        """Configure sample-parallel k-means clustering algorithm."""
-        self.comm = comm  # The communicator used.
+        self.comm = comm  # The communicator used
        pass


-# Implementierung Sample-parallele Version HIER.
+## IMPLEMENT SAMPLE-PARALLEL K-MEANS CLUSTERING VERSION HERE!

 if __name__ == "__main__":

    comm = MPI.COMM_WORLD
    rank, size = comm.rank, comm.size

    if rank == 0:
        print(
-            "##############################\n"
-            "# PyTorch k-Means Clustering #\n"
-            "##############################"
+            "#################################################\n"
+            "# Sample-Parallel k-Means Clustering in PyTorch #\n"
+            "#################################################"
        )

    path = "/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/cityscapes_300.h5"
    dataset = "cityscapes_data"

    if rank == 0:
        print(f"Loading dataset from {path}[{dataset}]...")

    # Dataset is split along the sample axis.
    # Each rank loads exclusive chunk of original dataset.
    with h5py.File(path, "r") as handle:
        chunk = int(handle[dataset].shape[0] / size)
        if rank == size - 1:
            data = torch.tensor(handle[dataset][rank * chunk :])
        else:
            data = torch.tensor(handle[dataset][rank * chunk : (rank + 1) * chunk])

    print("\t[OK]")

    # k-means hyperparameters
    num_clusters = 8
    num_iterations = 20

    kmeans_clusterer = KMeans(comm=comm, n_clusters=num_clusters, max_iter=num_iterations)
    if rank == 0:
        print("Start fitting the data...")
        start = time.perf_counter()  # Start runtime measurement.

    kmeans_clusterer.fit(data)  # Perform actual k-means clustering.

    if rank == 0:
        print(f"DONE.\nRun time:\t{time.perf_counter() - start} s")  # Measure and print runtime.
 ```

 %% Cell type:markdown id:d5379ea7 tags:

 ### Aufgabe 3
 Implementieren Sie ausgehend von obigem Code eine Feature-parallele Version des k-Means-Algorithmus. Den entsprechend parallelisierten Dataloader finden Sie im untenstehenden Code-Fragment. Testen Sie Ihren Code auf vier Knoten des bwUniClusters.

 %% Cell type:code id:217173aa tags:

 ``` python
 """
 Feature-parallel implementation of k-means clustering in PyTorch using MPI
 """
-import h5py
 import time
+
+import h5py
 import torch
 from mpi4py import MPI


 class KMeans:
    """
    Feature-parallel k-means clustering in PyTorch using MPI.
    """
    def __init__(
        self,
        comm: MPI.Comm = MPI.COMM_WORLD,
        n_clusters: int = 8,
        max_iter: int = 300,
        tol: float = -1.0,
    ) -> None:
        """Configure feature-parallel k-means clustering algorithm."""
        self.comm = comm  # The communicator used.
        pass

-# Implementierung Feature-parallele Version HIER.
+## IMPLEMENT FEATURE-PARALLEL K-MEANS CLUSTERING VERSION HERE!


-if __name == "__main__":
+if __name__ == "__main__":

    comm = MPI.COMM_WORLD
    rank, size = comm.rank, comm.size

    if rank == 0:
        print(
-            "##############################\n"
-            "# PyTorch k-Means Clustering #\n"
-            "##############################"
+            "##################################################\n"
+            "# Feature-Parallel k-Means Clustering in PyTorch #\n"
+            "##################################################"
        )

    path = "/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/cityscapes_300.h5"
    dataset = "cityscapes_data"

    if rank == 0:
        print(f"Loading dataset from {path}[{dataset}]...")

    # Dataset is split along the feature axis.
    # Each rank loads exclusive chunk of original dataset.
    with h5py.File(path, "r") as handle:
        chunk = int(handle[dataset].shape[1] / size)
        if rank == size - 1:
            data = torch.tensor(handle[dataset][:, rank * chunk :])
        else:
            data = torch.tensor(handle[dataset][:, rank * chunk : (rank + 1) * chunk])

    print("\t[OK]")

    # k-means hyperparameters
    num_clusters = 8
    num_iterations = 20

    kmeans_clusterer = KMeans(comm=comm, n_clusters=num_clusters, max_iter=num_iterations)

    if rank == 0:
        print("Start fitting the data...")
        start = time.perf_counter()  # Start runtime measurement.

    kmeans_clusterer.fit(data)  # Perform actual k-means clustering.

    if rank == 0:
        print(f"DONE.\nRun time:\t{time.perf_counter() - start} s")

 ```