Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • marie.weiel/scalableai2425
1 result
Show changes
Commits on Source (2)
Showing
with 11130 additions and 0 deletions
"""Parallel calculation of pairwise distances"""
import time
import h5py
import torch
from mpi4py import MPI
torch.set_default_dtype(torch.float32)
def dist(x: torch.Tensor, y: torch.Tensor, comm: MPI.Comm = MPI.COMM_WORLD) -> torch.Tensor:
"""
Calculate pairwise distances between all rows (samples, i.e., along axis 0) of two tensors x and y in parallel.
The distance matrix is calculated tile-wise with ring communication between processes, each holding a piece of x
and/or y.
Parameters
----------
x : torch.Tensor
First 2d tensor (of shape m/p x f). m is the total number of samples in x, distributed over p processors.
f is the number of features.
y : torch.Tensor
Second 2d tensor (of shape n/p x f). n is the total number of samples in x, distributed over p processors.
The number of features f must be the same as for x.
comm : MPI.Comm
Communicator to use. Default is ``MPI.COMM_WORLD``.
"""
# Check whether two input tensors are compatible.
if len(x.shape) != len(y.shape) != 2:
raise ValueError("Input tensors must be two-dimensional.")
if x.shape[1] != y.shape[1]:
raise ValueError(f"Input tensors must have the same number of features but {x.shape[1]} != {y.shape[1]}.")
size, rank = comm.size, comm.rank # Set up communication.
if size == 1: # Use torch functionality in non-parallel case.
return torch.cdist(x, y)
else: # Parallel case
# --- Setup and Matrix Initialization ---
mp, f = x.shape # Get number of samples in local chunk of x and number of features.
np = y.shape[0] # Get number of samples in local chunk of y.
# Each process initializes a local matrix, `local_distances`, of shape `(mp, n)`, where `mp` is the local chunk
# size of `x`, and `n` is the total number of samples in `y`. Each rank thus calculates the distance matrix
# chunk of size `mp x n`, i.e., rank 0 has distances from its own local `x` to all other `y`'s.
# Determine overall number of samples in y.
n = comm.allreduce(np, op=MPI.SUM)
print(f"Overall number of samples is {n}.")
# Initialize rank-local chunk of mp x n distance matrix with zeros.
local_distances = torch.zeros((mp, n))
# --- Managing Chunks and Displacements ---
# Determine where to put each result in the rank-local distance matrix chunk.
# Determine number of samples (rows) in each rank-local y.
y_counts = torch.tensor(comm.allgather(np), dtype=torch.int)
# Calculate corresponding displacements from counts to record the starting index of each chunk in y. Thus, each
# process can identify where in the result matrix it should write the distances.
y_displ = (0,) + tuple(torch.cumsum(y_counts, dim=0, dtype=torch.int)[:-1])
# --- Ring Communication Pattern ---
# Calculate distances in a "ring" pattern. Each process calculates distances for its local x chunk against its
# local y chunk (diagonal calculation). Then, through `size - 1` iterations, each process sends its y chunk to
# the next process in the "ring" while receiving a new y chunk from the previous process. This continues until
# each process has calculated distances between its x chunk and all chunks of y across all processes.
# 0th iteration: Calculate diagonal of global distance matrix.
# Each process calculates distances for its local x chunk against its local y chunk.
print(f"Rank [{rank}/{size}]: Calculate diagonal blocks in global distance matrix...")
# Extract actual result columns in distance matrix chunk for each rank.
cols = (y_displ[rank], y_displ[rank + 1] if (rank + 1) != size else n)
d_ij = torch.cdist(x, y)
local_distances[:, cols[0]: cols[1]] = d_ij
print(f"Rank [{rank}/{size}]: Start tile-wise ring communication...")
# Remaining `size-1` iterations: Send rank-local part of y to next process in circular fashion.
for iter_idx in range(1, size):
receiver = (rank + iter_idx) % size # Determine receiving process.
sender = (rank - iter_idx) % size # Determine sending process.
# Determine columns of rank-local distance matrix chunk to write result to.
col1 = y_displ[sender]
col2 = y_displ[sender + 1] if sender != size - 1 else n
# All but first `iter_idx` processes are first receiving, then sending.
if (rank // iter_idx) != 0:
stat = MPI.Status()
# Probe for incoming message containing the next chunk of y to consider.
comm.Probe(source=sender, tag=iter_idx, status=stat)
# Determine number of samples to receive (= overall number of floats to receive / number of features).
count = int(stat.Get_count(MPI.FLOAT) / f)
# Initialize tensor for incoming chunk of y with zeros.
moving = torch.zeros((count, f))
comm.Recv(moving, source=sender, tag=iter_idx)
# Send rank-local chunk of y to next process.
comm.Send(y, dest=receiver, tag=iter_idx)
# First `iter_idx` processes can now receive after sending.
if (rank // iter_idx) == 0:
stat = MPI.Status()
comm.Probe(source=sender, tag=iter_idx, status=stat)
count = int(stat.Get_count(MPI.FLOAT) / f)
moving = torch.zeros((count, f))
comm.Recv(moving, source=sender, tag=iter_idx)
# Calculate distances between stationary chunk of x and currently considered, moving chunk of y.
# Write result at correct position in distance matrix.
d_ij = torch.cdist(x, moving)
local_distances[:, col1: col2] = d_ij
print(f"Rank [{rank}/{size}]: [DONE]")
return local_distances
if __name__ == "__main__":
comm = MPI.COMM_WORLD
rank, size = comm.rank, comm.size
data_path = "/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5"
dataset = "data"
if rank == 0:
print(
"######################\n"
"# Pairwise distances #\n"
"######################\n"
f"COMM_WORLD size is {size}.\n"
f"Loading data... {data_path}[{dataset}]"
)
# Parallel data loader for SUSY data.
with h5py.File(data_path, "r") as handle:
chunk = int(handle[dataset].shape[0]/size)
if rank == size - 1:
data = torch.FloatTensor(handle[dataset][rank*chunk:])
else:
data = torch.FloatTensor(handle[dataset][rank*chunk:(rank+1)*chunk])
print(f"\t[OK]\nRank [{rank}/{size}]: Local data chunk has shape {list(data.shape)}...")
if rank == 0:
print("Start distance calculations...")
# Calculate distances of all SUSY samples w.r.t. each other and measure runtime.
start = time.perf_counter()
distances = dist(data, data, comm)
local_runtime = time.perf_counter() - start
# Calculate process-averaged runtime.
average_runtime = comm.allreduce(local_runtime, op=MPI.SUM) / size
print(f"Rank [{rank}/{size}]: Local distance matrix has shape {list(distances.shape)}.")
if rank == 0:
print(f"Process-averaged run time:\t{average_runtime} s")
import time
import h5py
import torch
from mpi4py import MPI
torch.set_default_dtype(torch.float32)
def dist_symmetric(x: torch.Tensor, y: torch.Tensor, comm: MPI.Comm = MPI.COMM_WORLD) -> torch.Tensor:
"""
Calculate pairwise distances between all rows (samples, i.e., along axis 0) of two tensors x and y in parallel.
The distance matrix is calculated tile-wise with ring communication between processes, each holding a piece of x
and/or y.
Parameters
----------
x : torch.Tensor
First 2d tensor (of shape m/p x f). m is the total number of samples in x, distributed over p processors.
f is the number of features.
y : torch.Tensor
Second 2d tensor (of shape n/p x f). n is the total number of samples in x, distributed over p processors.
The number of features f must be the same as for x.
comm : MPI.Comm
Communicator to use. Default is ``MPI.COMM_WORLD``.
"""
# Check whether two input tensors are compatible.
if len(x.shape) != len(y.shape) != 2:
raise ValueError("Input tensors must be two-dimensional.")
if x.shape[1] != y.shape[1]:
raise ValueError(f"Input tensors must have the same number of features but {x.shape[1]} != {y.shape[1]}.")
size, rank = comm.size, comm.rank # Set up communication.
if size == 1: # Use torch functionality in non-parallel case.
return torch.cdist(x, y)
else: # Parallel case
# --- Setup and Matrix Initialization ---
mp, f = x.shape # Get number of samples in local chunk of x and number of features.
np = y.shape[0] # Get number of samples in local chunk of y.
# Each process initializes a local matrix, `local_distances`, of shape `(mp, n)`, where `mp` is the local chunk
# size of `x`, and `n` is the total number of samples in `y`. Each rank thus calculates the distance matrix
# chunk of size `mp x n`, i.e., rank 0 has distances from its own local `x` to all other `y`'s.
# Determine overall number of samples in y.
n = comm.allreduce(np, op=MPI.SUM)
print(f"Overall number of samples is {n}.")
# Initialize rank-local chunk of mp x n distance matrix with zeros.
local_distances = torch.zeros((mp, n))
# --- Managing Chunks and Displacements ---
# Determine where to put each result in the rank-local distance matrix chunk.
# Determine number of samples (rows) in each rank-local y.
y_counts = torch.tensor(comm.allgather(torch.numel(y) // f), dtype=torch.int)
# Calculate corresponding displacements from counts to record the starting index of each chunk in y. Thus, each
# process can identify where in the result matrix it should write the distances.
y_displ = (0,) + tuple(torch.cumsum(y_counts, dim=0, dtype=torch.int)[:-1])
# Extract actual result columns in distance matrix chunk for each rank.
cols = (y_displ[rank], y_displ[rank + 1] if (rank + 1) != size else n)
# --- Ring Communication Pattern ---
# Calculate distances in a "ring" pattern. Each process calculates distances for its local x chunk against its
# local y chunk (diagonal calculation). Then, through `size - 1` iterations, each process sends its y chunk to
# the next process in the "ring" while receiving a new y chunk from the previous process. This continues until
# each process has calculated distances between its x chunk and all chunks of y across all processes.
x_ = x
stationary = y
# 0th iteration: Calculate diagonal of global distance matrix.
# Each process calculates distances for its local x chunk against its local y chunk.
print(f"Rank [{rank}/{size}]: Calculate diagonal blocks...")
d_ij = torch.cdist(x_, stationary)
local_distances[:, cols[0]: cols[1]] = d_ij
print(f"Rank [{rank}/{size}]: Start tile-wise ring communication...")
# Remaining `(size+1) // 2` iterations: Send rank-local part of y to next process in circular fashion.
# We can perform less iterations due to the symmetric nature of the metric.
for iter_idx in range(1, (size + 2) // 2):
print(f"Rank [{rank}/{size}]: Starting iteration {iter_idx}")
# Since in the final iteration (with and even size) we only need to send from the first half of the
# processes, we check if this is the final iteration.
is_final_iteration = iter_idx == (size + 1) // 2
receiver = (rank + iter_idx) % size # Determine receiving process.
sender = (rank - iter_idx) % size # Determine sending process.
# Determine columns of rank-local distance matrix chunk to write result to.
col1 = y_displ[sender]
col2 = y_displ[sender + 1] if sender != size - 1 else n
columns = (col1, col2)
# Since the final iteration is different, we handle the communication differently.
if is_final_iteration:
# The second half of the processes only need to receive.
if rank >= size // 2:
stat = MPI.Status()
# Probe for incoming message containing the next chunk of y to consider.
comm.Probe(source=sender, tag=iter_idx, status=stat)
# Determine number of samples to receive (= overall number of floats to receive / number of features).
count = int(stat.Get_count(MPI.FLOAT) / f)
# Initialize tensor for incoming chunk of y with zeros.
moving = torch.zeros((count, f))
comm.Recv(moving, source=sender, tag=iter_idx)
# Calculate distances between stationary chunk of x and currently considered, moving chunk of y.
# Write result at correct position in distance matrix.
d_ij = torch.cdist(x_, moving)
local_distances[:, columns[0]: columns[1]] = d_ij
# The first half of the processes only need to send.
if rank < size // 2:
comm.Send(stationary, dest=receiver, tag=iter_idx)
# Standard communication for all but the final iteration.
else:
# All but first `iter_idx` processes are first receiving, then sending.
if (rank // iter_idx) != 0:
stat = MPI.Status()
# Probe for incoming message containing the next chunk of y to consider.
comm.Probe(source=sender, tag=iter_idx, status=stat)
# Determine number of samples to receive (= overall number of floats to receive / number of features).
count = int(stat.Get_count(MPI.FLOAT) / f)
# Initialize tensor for incoming chunk of y with zeros.
moving = torch.zeros((count, f))
comm.Recv(moving, source=sender, tag=iter_idx)
# Send rank-local chunk of y to next process.
comm.Send(stationary, dest=receiver, tag=iter_idx)
# First `iter_idx` processes can now receive after sending.
if (rank // iter_idx) == 0:
stat = MPI.Status()
comm.Probe(source=sender, tag=iter_idx, status=stat)
count = int(stat.Get_count(MPI.FLOAT) / f)
moving = torch.zeros((count, f))
comm.Recv(moving, source=sender, tag=iter_idx)
# Calculate distances between stationary chunk of x and currently considered, moving chunk of y.
# Write result at correct position in distance matrix.
d_ij = torch.cdist(x_, moving)
local_distances[:, columns[0]: columns[1]] = d_ij
# Step 2.3 - Only for symmetric metrics:
# We want to send the calculated distances back to process they came from since with symmetric metrics
# d(i,j) = d(j,i) and we can save calculation.
# First, we determine the columns of the distance matrix that the receiver will need to write.
symmetric_col1 = y_displ[receiver]
symmetric_col2 = y_displ[receiver + 1] if receiver != size - 1 else n
symmetric_columns = (symmetric_col1, symmetric_col2)
# Now, we send the calculated distances back to the symmetric tiles.
# We need to calculate the chunk size, therefore subtracting the smaller column index from the larger
# column index gives us the number of samples.
symmetric = torch.zeros(symmetric_columns[1] - symmetric_columns[0], (cols[1] - cols[0]))
# Communication for the final iteration.
if is_final_iteration:
# Now the first half of the processes are receiving the symmetrical values back.
if rank < size // 2:
comm.Recv(symmetric, source=receiver, tag=iter_idx)
# Save into the local distance matrix.
local_distances[:, symmetric_columns[0]: symmetric_columns[1]] = symmetric.transpose(0, 1)
# The second half that performed the calculation send the results back.
if rank >= size // 2:
comm.Send(d_ij, dest=sender, tag=iter_idx)
# Save the local distance matrix, nothing is received.
local_distances[:, symmetric_columns[0]: symmetric_columns[1]] = d_ij
# Standard communication handling.
else:
# As before, all but first `iter_idx` processes are first receiving, then sending.
if (rank // iter_idx) != 0:
comm.Recv(symmetric, source=receiver, tag=iter_idx)
# Then send.
comm.Send(d_ij, dest=sender, tag=iter_idx)
# Now the first `iter_idx` process can also receive after sending.
if (rank // iter_idx) == 0:
comm.Recv(symmetric, source=receiver, tag=iter_idx)
# Save into the local distance matrix.
local_distances[:, symmetric_columns[0]: symmetric_columns[1]] = symmetric.transpose(0, 1)
print(f"Rank [{rank}/{size}]: [DONE]")
return local_distances
if __name__ == "__main__":
comm = MPI.COMM_WORLD
rank, size = comm.rank, comm.size
data_path = "./SUSY_50k.h5"
dataset = "data"
if rank == 0:
print(
"######################\n"
"# Pairwise distances #\n"
"######################\n"
f"COMM_WORLD size is {size}.\n"
f"Loading data... {data_path}[{dataset}]"
)
# Parallel data loader for SUSY data.
with h5py.File(data_path, "r") as handle:
chunk = int(handle[dataset].shape[0] / size)
if rank == size - 1:
data = torch.FloatTensor(handle[dataset][rank * chunk:])
else:
data = torch.FloatTensor(handle[dataset][rank * chunk:(rank + 1) * chunk])
print(f"\t[OK]\nRank [{rank}/{size}]: Local data chunk has shape {list(data.shape)}...")
if rank == 0:
print("Start distance calculations...")
# Calculate distances of all SUSY samples w.r.t. each other and measure runtime.
start = time.perf_counter()
distances = dist_symmetric(data, data, comm)
local_runtime = time.perf_counter() - start
# Calculate process-averaged runtime.
average_runtime = comm.allreduce(local_runtime, op=MPI.SUM) / size
print(f"Rank [{rank}/{size}]: Local distance matrix has shape {list(distances.shape)}.")
if rank == 0:
print(f"Process-averaged run time:\t{average_runtime} s")
import time
import h5py
import numpy as np
import torch
from mpi4py import MPI
torch.set_default_dtype(torch.float32)
def dist_symmetric(x: torch.Tensor, y: torch.Tensor, comm: MPI.Comm = MPI.COMM_WORLD) -> torch.Tensor:
"""
Calculate pairwise distances between all rows (samples, i.e., along axis 0) of two tensors x and y in parallel.
The distance matrix is calculated tile-wise with ring communication between processes, each holding a piece of x
and/or y.
Parameters
----------
x : torch.Tensor
First 2d tensor (of shape m/p x f). m is the total number of samples in x, distributed over p processors.
f is the number of features.
y : torch.Tensor
Second 2d tensor (of shape n/p x f). n is the total number of samples in x, distributed over p processors.
The number of features f must be the same as for x.
comm : MPI.Comm
Communicator to use. Default is ``MPI.COMM_WORLD``.
"""
# Check whether two input tensors are compatible.
if len(x.shape) != len(y.shape) != 2:
raise ValueError("Input tensors must be two-dimensional.")
if x.shape[1] != y.shape[1]:
raise ValueError(f"Input tensors must have the same number of features but {x.shape[1]} != {y.shape[1]}.")
size, rank = comm.size, comm.rank # Set up communication.
if size == 1: # Use torch functionality in non-parallel case.
return torch.cdist(x, y)
else: # Parallel case
# --- Setup and Matrix Initialization ---
mp, f = x.shape # Get number of samples in local chunk of x and number of features.
np = y.shape[0] # Get number of samples in local chunk of y.
# Each process initializes a local matrix, `local_distances`, of shape `(mp, n)`, where `mp` is the local chunk
# size of `x`, and `n` is the total number of samples in `y`. Each rank thus calculates the distance matrix
# chunk of size `mp x n`, i.e., rank 0 has distances from its own local `x` to all other `y`'s.
# Determine overall number of samples in y.
n = comm.allreduce(np, op=MPI.SUM)
print(f"Overall number of samples is {n}.")
# Initialize rank-local chunk of mp x n distance matrix with zeros.
local_distances = torch.zeros((mp, n))
# --- Managing Chunks and Displacements ---
# Determine where to put each result in the rank-local distance matrix chunk.
# Determine number of samples (rows) in each rank-local y.
y_counts = torch.tensor(comm.allgather(torch.numel(y) // f), dtype=torch.int)
# Calculate corresponding displacements from counts to record the starting index of each chunk in y. Thus, each
# process can identify where in the result matrix it should write the distances.
y_displ = (0,) + tuple(torch.cumsum(y_counts, dim=0, dtype=torch.int)[:-1])
# Extract actual result columns in distance matrix chunk for each rank.
cols = (y_displ[rank], y_displ[rank + 1] if (rank + 1) != size else n)
# --- Ring Communication Pattern ---
# Calculate distances in a "ring" pattern. Each process calculates distances for its local x chunk against its
# local y chunk (diagonal calculation). Then, through `size - 1` iterations, each process sends its y chunk to
# the next process in the "ring" while receiving a new y chunk from the previous process. This continues until
# each process has calculated distances between its x chunk and all chunks of y across all processes.
x_ = x
stationary = y
# 0th iteration: Calculate diagonal of global distance matrix.
# Each process calculates distances for its local x chunk against its local y chunk.
print(f"Rank [{rank}/{size}]: Calculate diagonal blocks...")
d_ij = torch.cdist(x_, stationary)
local_distances[:, cols[0]: cols[1]] = d_ij
print(f"Rank [{rank}/{size}]: Start tile-wise ring communication...")
# Remaining `(size+1) // 2` iterations: Send rank-local part of y to next process in circular fashion.
# We can perform less iterations due to the symmetric nature of the metric.
for iter_idx in range(1, (size + 2) // 2):
print(f"Rank [{rank}/{size}]: Starting iteration {iter_idx}")
# Since in the final iteration (with and even size) we only need to send from the first half of the
# processes, we check if this is the final iteration.
is_final_iteration = iter_idx == (size + 1) // 2
receiver = (rank + iter_idx) % size # Determine receiving process.
sender = (rank - iter_idx) % size # Determine sending process.
# Determine columns of rank-local distance matrix chunk to write result to.
col1 = y_displ[sender]
col2 = y_displ[sender + 1] if sender != size - 1 else n
columns = (col1, col2)
# Since the final iteration is different, we handle the communication differently.
if is_final_iteration:
# The second half of the processes only need to receive.
if rank >= size // 2:
stat = MPI.Status()
# Probe for incoming message containing the next chunk of y to consider.
comm.Probe(source=sender, tag=iter_idx, status=stat)
# Determine number of samples to receive (= overall number of floats to receive / number of features).
count = int(stat.Get_count(MPI.FLOAT) / f)
# Initialize tensor for incoming chunk of y with zeros.
moving = torch.zeros((count, f))
comm.Recv(moving, source=sender, tag=iter_idx)
# Calculate distances between stationary chunk of x and currently considered, moving chunk of y.
# Write result at correct position in distance matrix.
d_ij = torch.cdist(x_, moving)
local_distances[:, columns[0]: columns[1]] = d_ij
# The first half of the processes only need to send.
if rank < size // 2:
comm.Send(stationary, dest=receiver, tag=iter_idx)
# Standard communication for all but the final iteration.
else:
# All but first `iter_idx` processes are first receiving, then sending.
if (rank // iter_idx) != 0:
stat = MPI.Status()
# Probe for incoming message containing the next chunk of y to consider.
comm.Probe(source=sender, tag=iter_idx, status=stat)
# Determine number of samples to receive (= overall number of floats to receive / number of features).
count = int(stat.Get_count(MPI.FLOAT) / f)
# Initialize tensor for incoming chunk of y with zeros.
moving = torch.zeros((count, f))
comm.Recv(moving, source=sender, tag=iter_idx)
# Send rank-local chunk of y to next process.
comm.Send(stationary, dest=receiver, tag=iter_idx)
# First `iter_idx` processes can now receive after sending.
if (rank // iter_idx) == 0:
stat = MPI.Status()
comm.Probe(source=sender, tag=iter_idx, status=stat)
count = int(stat.Get_count(MPI.FLOAT) / f)
moving = torch.zeros((count, f))
comm.Recv(moving, source=sender, tag=iter_idx)
# Calculate distances between stationary chunk of x and currently considered, moving chunk of y.
# Write result at correct position in distance matrix.
d_ij = torch.cdist(x_, moving)
local_distances[:, columns[0]: columns[1]] = d_ij
# Step 2.3 - Only for symmetric metrics:
# We want to send the calculated distances back to process they came from since with symmetric metrics
# d(i,j) = d(j,i) and we can save calculation.
# First, we determine the columns of the distance matrix that the receiver will need to write.
symmetric_col1 = y_displ[receiver]
symmetric_col2 = y_displ[receiver + 1] if receiver != size - 1 else n
symmetric_columns = (symmetric_col1, symmetric_col2)
# Now, we send the calculated distances back to the symmetric tiles.
# We need to calculate the chunk size, therefore subtracting the smaller column index from the larger
# column index gives us the number of samples.
symmetric = torch.zeros(symmetric_columns[1] - symmetric_columns[0], (cols[1] - cols[0]))
# Communication for the final iteration.
if is_final_iteration:
# Now the first half of the processes are receiving the symmetrical values back.
if rank < size // 2:
comm.Recv(symmetric, source=receiver, tag=iter_idx)
# Save into the local distance matrix.
local_distances[:, symmetric_columns[0]: symmetric_columns[1]] = symmetric.transpose(0, 1)
# The second half that performed the calculation send the results back.
if rank >= size // 2:
comm.Send(d_ij, dest=sender, tag=iter_idx)
# Save the local distance matrix, nothing is received.
local_distances[:, symmetric_columns[0]: symmetric_columns[1]] = d_ij
# Standard communication handling.
else:
# As before, all but first `iter_idx` processes are first receiving, then sending.
if (rank // iter_idx) != 0:
comm.Recv(symmetric, source=receiver, tag=iter_idx)
# Then send.
comm.Send(d_ij, dest=sender, tag=iter_idx)
# Now the first `iter_idx` process can also receive after sending.
if (rank // iter_idx) == 0:
comm.Recv(symmetric, source=receiver, tag=iter_idx)
# Save into the local distance matrix.
local_distances[:, symmetric_columns[0]: symmetric_columns[1]] = symmetric.transpose(0, 1)
print(f"Rank [{rank}/{size}]: [DONE]")
return local_distances
if __name__ == "__main__":
comm = MPI.COMM_WORLD
rank, size = comm.rank, comm.size
data_path = "/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5"
dataset = "data"
if rank == 0:
print(
"######################\n"
"# Pairwise distances #\n"
"######################\n"
f"COMM_WORLD size is {size}.\n"
f"Loading data... {data_path}[{dataset}]"
)
# Parallel data loader for SUSY data.
with h5py.File(data_path, "r") as handle:
nmax = handle[dataset].shape[0]
pmax = 16
n_samples = int(np.round(np.sqrt(nmax**2*size/pmax)))
print(
f"Overall number of samples considered: {n_samples}\n"
f"n^2/p = {n_samples**2/size}"
)
dset = handle[dataset][:n_samples]
chunk = int(dset.shape[0]/size)
if rank == size-1:
data = torch.FloatTensor(dset[rank*chunk:])
else:
data = torch.FloatTensor(dset[rank*chunk:(rank+1)*chunk])
print(f"\t[OK]\nRank [{rank}/{size}]: Local data chunk has shape {list(data.shape)}...")
if rank == 0:
print("Start distance calculations...")
# Calculate distances of all SUSY samples w.r.t. each other and measure runtime.
start = time.perf_counter()
distances = dist_symmetric(data, data, comm)
local_runtime = time.perf_counter() - start
# Calculate process-averaged runtime.
average_runtime = comm.allreduce(local_runtime, op=MPI.SUM) / size
print(f"Rank [{rank}/{size}]: Local distance matrix has shape {list(distances.shape)}.")
if rank == 0:
print(f"Process-averaged run time:\t{average_runtime} s")
"""Parallel calculation of pairwise distances"""
import time
import h5py
import numpy as np
import torch
from mpi4py import MPI
torch.set_default_dtype(torch.float32)
def dist(x: torch.Tensor, y: torch.Tensor, comm: MPI.Comm = MPI.COMM_WORLD) -> torch.Tensor:
"""
Calculate pairwise distances between all rows (samples, i.e., along axis 0) of two tensors x and y in parallel.
The distance matrix is calculated tile-wise with ring communication between processes, each holding a piece of x
and/or y.
Parameters
----------
x : torch.Tensor
First 2d tensor (of shape m/p x f). m is the total number of samples in x, distributed over p processors.
f is the number of features.
y : torch.Tensor
Second 2d tensor (of shape n/p x f). n is the total number of samples in x, distributed over p processors.
The number of features f must be the same as for x.
comm : MPI.Comm
Communicator to use. Default is ``MPI.COMM_WORLD``.
"""
# Check whether two input tensors are compatible.
if len(x.shape) != len(y.shape) != 2:
raise ValueError("Input tensors must be two-dimensional.")
if x.shape[1] != y.shape[1]:
raise ValueError(f"Input tensors must have the same number of features but {x.shape[1]} != {y.shape[1]}.")
size, rank = comm.size, comm.rank # Set up communication.
if size == 1: # Use torch functionality in non-parallel case.
return torch.cdist(x, y)
else: # Parallel case
# --- Setup and Matrix Initialization ---
mp, f = x.shape # Get number of samples in local chunk of x and number of features.
np = y.shape[0] # Get number of samples in local chunk of y.
# Each process initializes a local matrix, `local_distances`, of shape `(mp, n)`, where `mp` is the local chunk
# size of `x`, and `n` is the total number of samples in `y`. Each rank thus calculates the distance matrix
# chunk of size `mp x n`, i.e., rank 0 has distances from its own local `x` to all other `y`'s.
# Determine overall number of samples in y.
n = comm.allreduce(np, op=MPI.SUM)
print(f"Overall number of samples is {n}.")
# Initialize rank-local chunk of mp x n distance matrix with zeros.
local_distances = torch.zeros((mp, n))
# --- Managing Chunks and Displacements ---
# Determine where to put each result in the rank-local distance matrix chunk.
# Determine number of samples (rows) in each rank-local y.
y_counts = torch.tensor(comm.allgather(np), dtype=torch.int)
# Calculate corresponding displacements from counts to record the starting index of each chunk in y. Thus, each
# process can identify where in the result matrix it should write the distances.
y_displ = (0,) + tuple(torch.cumsum(y_counts, dim=0, dtype=torch.int)[:-1])
# --- Ring Communication Pattern ---
# Calculate distances in a "ring" pattern. Each process calculates distances for its local x chunk against its
# local y chunk (diagonal calculation). Then, through `size - 1` iterations, each process sends its y chunk to
# the next process in the "ring" while receiving a new y chunk from the previous process. This continues until
# each process has calculated distances between its x chunk and all chunks of y across all processes.
# 0th iteration: Calculate diagonal of global distance matrix.
# Each process calculates distances for its local x chunk against its local y chunk.
print(f"Rank [{rank}/{size}]: Calculate diagonal blocks in global distance matrix...")
# Extract actual result columns in distance matrix chunk for each rank.
cols = (y_displ[rank], y_displ[rank + 1] if (rank + 1) != size else n)
d_ij = torch.cdist(x, y)
local_distances[:, cols[0]: cols[1]] = d_ij
print(f"Rank [{rank}/{size}]: Start tile-wise ring communication...")
# Remaining `size-1` iterations: Send rank-local part of y to next process in circular fashion.
for iter_idx in range(1, size):
receiver = (rank + iter_idx) % size # Determine receiving process.
sender = (rank - iter_idx) % size # Determine sending process.
# Determine columns of rank-local distance matrix chunk to write result to.
col1 = y_displ[sender]
col2 = y_displ[sender + 1] if sender != size - 1 else n
# All but first `iter_idx` processes are first receiving, then sending.
if (rank // iter_idx) != 0:
stat = MPI.Status()
# Probe for incoming message containing the next chunk of y to consider.
comm.Probe(source=sender, tag=iter_idx, status=stat)
# Determine number of samples to receive (= overall number of floats to receive / number of features).
count = int(stat.Get_count(MPI.FLOAT) / f)
# Initialize tensor for incoming chunk of y with zeros.
moving = torch.zeros((count, f))
comm.Recv(moving, source=sender, tag=iter_idx)
# Send rank-local chunk of y to next process.
comm.Send(y, dest=receiver, tag=iter_idx)
# First `iter_idx` processes can now receive after sending.
if (rank // iter_idx) == 0:
stat = MPI.Status()
comm.Probe(source=sender, tag=iter_idx, status=stat)
count = int(stat.Get_count(MPI.FLOAT) / f)
moving = torch.zeros((count, f))
comm.Recv(moving, source=sender, tag=iter_idx)
# Calculate distances between stationary chunk of x and currently considered, moving chunk of y.
# Write result at correct position in distance matrix.
d_ij = torch.cdist(x, moving)
local_distances[:, col1: col2] = d_ij
print(f"Rank [{rank}/{size}]: [DONE]")
return local_distances
if __name__ == "__main__":
comm = MPI.COMM_WORLD
rank, size = comm.rank, comm.size
data_path = "/pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5"
dataset = "data"
if rank == 0:
print(
"######################\n"
"# Pairwise distances #\n"
"######################\n"
f"COMM_WORLD size is {size}.\n"
f"Loading data... {data_path}[{dataset}]"
)
# Parallel data loader for SUSY data.
with h5py.File(data_path, "r") as handle:
nmax = handle[dataset].shape[0]
pmax = 16
n_samples = int(np.round(np.sqrt(nmax**2*size/pmax)))
print(
f"Overall number of samples considered: {n_samples}\n"
f"n^2/p = {n_samples**2/size}"
)
dset = handle[dataset][:n_samples]
chunk = int(dset.shape[0]/size)
if rank == size-1:
data = torch.FloatTensor(dset[rank*chunk:])
else:
data = torch.FloatTensor(dset[rank*chunk:(rank+1)*chunk])
print(f"\t[OK]\nRank [{rank}/{size}]: Local data chunk has shape {list(data.shape)}...")
if rank == 0:
print("Start distance calculations...")
# Calculate distances of all SUSY samples w.r.t. each other and measure runtime.
start = time.perf_counter()
distances = dist(data, data, comm)
local_runtime = time.perf_counter() - start
# Calculate process-averaged runtime.
average_runtime = comm.allreduce(local_runtime, op=MPI.SUM) / size
print(f"Rank [{rank}/{size}]: Local distance matrix has shape {list(distances.shape)}.")
if rank == 0:
print(f"Process-averaged run time:\t{average_runtime} s")
import argparse
import os
import pathlib
import re
from glob import glob
from typing import Optional
import matplotlib.pyplot as plt
def process_file(file_path: str) -> Optional[float]:
"""
Extract all process-averaged run times, remove the largest value, and compute the average.
Parameters
----------
file_path : str
Path to the .out file.
Returns
-------
Optional[float]
The average of the run times after dropping the largest value, or None if there
are not enough values.
"""
runtime_pattern = r"Process-averaged run time:\s+(\d+\.\d+)"
with open(file_path, "r") as file:
# Extract all run time values as floats.
times = [float(match) for match in re.findall(runtime_pattern, file.read())]
# If fewer than two values are found, return None (not enough data to drop the highest value)
if len(times) <= 1:
return
# Drop the largest value.
times.remove(max(times))
# Calculate and return the average of the remaining values.
return sum(times) / len(times)
def extract_node_count(filename: str) -> Optional[int]:
"""
Extract the node count from a filename.
Parameters
----------
filename : str
The name of the .out file.
Returns
-------
Optional[int]
The extracted node count as an integer, or None if not found.
"""
filename_pattern = r"nodes_(\d+)\.out"
match = re.search(filename_pattern, filename)
return int(match.group(1)) if match else None
def process_out_files(folder_path: str) -> dict[int, Optional[float]]:
"""
Processes all .out files in a folder to calculate average run times by node count.
Parameters
----------
folder_path : str
Path to the folder containing .out files.
Returns
-------
dict[int, Optional[float]]
Dictionary mapping each node count to the average run time (or None if insufficient data).
"""
results = {}
for file_path in glob(os.path.join(folder_path, "*.out")):
filename = os.path.basename(file_path)
node_count = extract_node_count(filename)
if node_count is not None:
avg_time = process_file(file_path)
if avg_time is not None:
results[node_count] = avg_time
# Display results
for nodes, avg_time in sorted(results.items()):
print(f"Nodes {nodes}: Average time (without max value) = {avg_time:.5f} s")
return results
def plot_results(results: dict[int, float], scaling_type: str, file_path: pathlib.Path | str) -> None:
"""
Plot runtime, speedup, and efficiency based on the results dictionary.
Parameters
----------
results : dict[int, float]
Dictionary containing average run time for each node count.
scaling_type : str
The type of scaling considered. Must be either "strong" or "weak".
"""
# Sort results by number of nodes.
nodes = sorted(results.keys())
runtimes = [results[n] for n in nodes]
if scaling_type == "strong":
# Calculate speedup (using the runtime with the fewest nodes as the baseline)
baseline_runtime = runtimes[0]
derived_quantity = [baseline_runtime / runtime for runtime in runtimes]
derived_label = "Speedup"
ideal = nodes
elif scaling_type == "weak":
baseline_runtime = runtimes[0]
derived_quantity = [baseline_runtime / runtime for runtime in runtimes]
derived_label = "Efficiency"
ideal = len(nodes) * [1]
y_lim = [0, 1.1]
else:
raise ValueError(f"Unknown scaling type: {scaling_type}")
print(derived_quantity)
# Settings
labelsize = "x-large"
# Plotting
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# Runtime vs Number of Nodes
ax1.set_facecolor("lightgrey")
ax1.plot(nodes, runtimes, marker="X", ms=10, color="#009682")
ax1.set_xlabel("Number of nodes", fontweight="bold", fontsize=labelsize)
ax1.set_ylabel("Runtime / s", fontweight="bold", fontsize=labelsize)
ax1.grid(True)
ax1.grid(color="white", linewidth=1.7)
ax1.tick_params(axis="both", labelsize=labelsize)
ax1.set_xticks(nodes, [str(node) for node in nodes])
# Speedup / Efficiency
ax2.set_facecolor("lightgrey")
ax2.plot(nodes, ideal, ls="--", color="k", label="Ideal")
ax2.plot(nodes, derived_quantity, marker="X", ms=10, color="#009682", label="Actual")
ax2.set_xlabel("Number of nodes", fontweight="bold", fontsize=labelsize)
ax2.set_ylabel(derived_label, fontweight="bold", fontsize=labelsize)
ax2.legend()
ax2.grid(True)
ax2.grid(color="white", linewidth=1.7)
ax2.tick_params(axis="both", labelsize=labelsize)
ax2.set_xticks(nodes, [str(node) for node in nodes])
if scaling_type == "weak":
ax2.set_ylim(y_lim)
plt.tight_layout()
plt.savefig(pathlib.Path(file_path) / f"{file_path}.pdf")
plt.show()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="Plot cdist scaling results",
)
parser.add_argument( # Function to optimize
"--path",
type=pathlib.Path,
default="./",
)
config = parser.parse_args()
# Directory containing the .out files
results = process_out_files(config.path)
# Plot the results
plot_results(results, scaling_type=str(config.path).split("_")[0], file_path=pathlib.Path(config.path))
Run 1
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 6.33100987970829 s
Run 2
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 4.461751670576632 s
Run 3
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 4.44517351873219 s
Run 4
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 4.454093327745795 s
Run 5
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 4.474684880115092 s
Run 6
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 4.447230387479067 s
Run 7
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 4.445676466450095 s
Run 8
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 4.4390754187479615 s
Run 9
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 4.455759993754327 s
Run 10
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 4.4520881017670035 s
Run 11
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 4.454913998953998 s
============================= JOB FEEDBACK =============================
NodeName=uc2n420
Job ID: 24674001
Cluster: uc2
User/Group: ku4408/scc
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 40
CPU Utilized: 00:01:13
CPU Efficiency: 1.12% of 01:48:40 core-walltime
Job Wall-clock time: 00:02:43
Memory Utilized: 9.81 GB
Memory Efficiency: 22.31% of 43.95 GB
This diff is collapsed.
Run 1
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.994515418075025 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 2
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.163690152112395 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 3
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.160190102178603 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 4
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.163418059237301 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 5
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.159190197940916 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 6
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.163499476853758 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 7
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.1650856817141175 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 8
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.165958676021546 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 9
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.162757798098028 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 10
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.164449342060834 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 11
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks in global distance matrix...
Rank [1/2]: Calculate diagonal blocks in global distance matrix...
Rank [0/2]: Start tile-wise ring communication...
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 4.169782547280192 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
============================= JOB FEEDBACK =============================
NodeName=uc2n[259-260]
Job ID: 24674015
Cluster: uc2
User/Group: ku4408/scc
State: COMPLETED (exit code 0)
Nodes: 2
Cores per node: 80
CPU Utilized: 00:02:21
CPU Efficiency: 0.66% of 05:57:20 core-walltime
Job Wall-clock time: 00:02:14
Memory Utilized: 9.81 GB
Memory Efficiency: 5.58% of 175.78 GB
This diff is collapsed.
Run 1
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: [DONE]
Rank [2/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.2419050564058125 s
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Run 2
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [1/4]: [DONE]
Rank [2/4]: [DONE]
Rank [0/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.143836056580767 s
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Run 3
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: [DONE]
Rank [2/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.1328196288086474 s
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Run 4
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [1/4]: [DONE]
Rank [2/4]: [DONE]
Rank [0/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.1412369539029896 s
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Run 5
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [2/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.1448243020568043 s
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Run 6
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [2/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.1342911082319915 s
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Run 7
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: [DONE]
Rank [2/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.131936827208847 s
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Run 8
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [1/4]: [DONE]
Rank [2/4]: [DONE]
Rank [0/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.134611321846023 s
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Run 9
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: [DONE]
Rank [2/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.1346833237912506 s
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Run 10
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: [DONE]
Rank [2/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.1418009700719267 s
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Run 11
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks in global distance matrix...
Rank [3/4]: Calculate diagonal blocks in global distance matrix...
Rank [2/4]: Calculate diagonal blocks in global distance matrix...
Rank [1/4]: Calculate diagonal blocks in global distance matrix...
Rank [0/4]: Start tile-wise ring communication...
Rank [2/4]: Start tile-wise ring communication...
Rank [3/4]: Start tile-wise ring communication...
Rank [1/4]: Start tile-wise ring communication...
Rank [3/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: [DONE]
Rank [2/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.141862073680386 s
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
============================= JOB FEEDBACK =============================
NodeName=uc2n[233-234,236-237]
Job ID: 24674016
Cluster: uc2
User/Group: ku4408/scc
State: COMPLETED (exit code 0)
Nodes: 4
Cores per node: 80
CPU Utilized: 00:03:11
CPU Efficiency: 0.52% of 10:08:00 core-walltime
Job Wall-clock time: 00:01:54
Memory Utilized: 3.98 GB
Memory Efficiency: 1.13% of 351.56 GB
This diff is collapsed.
File added
Run 1
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.940975139848888 s
Run 2
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.898665787652135 s
Run 3
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.895133529789746 s
Run 4
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.907652365043759 s
Run 5
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.929541239514947 s
Run 6
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.893667569383979 s
Run 7
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.90837056748569 s
Run 8
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.9261263478547335 s
Run 9
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.914638973772526 s
Run 10
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.895798914134502 s
Run 11
######################
# Pairwise distances #
######################
COMM_WORLD size is 1.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/1]: Local data chunk has shape [50000, 18]...
Start distance calculations...
Rank [0/1]: Local distance matrix has shape [50000, 50000].
Process-averaged run time: 5.89725339692086 s
============================= JOB FEEDBACK =============================
NodeName=uc2n341
Job ID: 24894108
Cluster: uc2
User/Group: ku4408/scc
State: COMPLETED (exit code 0)
Nodes: 1
Cores per node: 40
CPU Utilized: 00:01:32
CPU Efficiency: 1.67% of 01:32:00 core-walltime
Job Wall-clock time: 00:02:18
Memory Utilized: 411.15 MB
Memory Efficiency: 0.91% of 43.95 GB
Run 1
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.266019005328417 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 2
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.181757535319775 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 3
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.169249661266804 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 4
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.169275370426476 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 5
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.171142612583935 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 6
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.167563481256366 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 7
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.172976263333112 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 8
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.169111441355199 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 9
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.173400138504803 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 10
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.16715893195942 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
Run 11
######################
# Pairwise distances #
######################
COMM_WORLD size is 2.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/2]: Local data chunk has shape [25000, 18]...
Start distance calculations...
[OK]
Rank [1/2]: Local data chunk has shape [25000, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/2]: Calculate diagonal blocks...
Rank [1/2]: Calculate diagonal blocks...
Rank [0/2]: Start tile-wise ring communication...
Rank [0/2]: Starting iteration 1
Rank [1/2]: Start tile-wise ring communication...
Rank [1/2]: Starting iteration 1
Rank [1/2]: [DONE]
Rank [0/2]: [DONE]
Rank [0/2]: Local distance matrix has shape [25000, 50000].
Process-averaged run time: 6.168558022007346 s
Rank [1/2]: Local distance matrix has shape [25000, 50000].
============================= JOB FEEDBACK =============================
NodeName=uc2n[005-006]
Job ID: 24894109
Cluster: uc2
User/Group: ku4408/scc
State: COMPLETED (exit code 0)
Nodes: 2
Cores per node: 80
CPU Utilized: 00:03:21
CPU Efficiency: 0.75% of 07:25:20 core-walltime
Job Wall-clock time: 00:02:47
Memory Utilized: 9.80 GB
Memory Efficiency: 5.58% of 175.78 GB
Run 1
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [3/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [0/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 3.0001420916523784 s
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Run 2
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [3/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.891579987015575 s
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Run 3
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [3/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.8908693362027407 s
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Run 4
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [3/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.8817238493356854 s
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Run 5
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [3/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.8827228804584593 s
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Run 6
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [3/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.8919111364521086 s
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Run 7
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [3/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.8823061871808022 s
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Run 8
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [3/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.882331378525123 s
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Run 9
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [3/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.8921143126208335 s
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Run 10
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [3/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.8922928040847182 s
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
Run 11
######################
# Pairwise distances #
######################
COMM_WORLD size is 4.
Loading data... /pfs/work7/workspace/scratch/ku4408-VL-ScalableAI/data/SUSY_50k.h5[data]
[OK]
Rank [0/4]: Local data chunk has shape [12500, 18]...
Start distance calculations...
[OK]
Rank [2/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [3/4]: Local data chunk has shape [12500, 18]...
[OK]
Rank [1/4]: Local data chunk has shape [12500, 18]...
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Overall number of samples is 50000.
Rank [0/4]: Calculate diagonal blocks...
Rank [2/4]: Calculate diagonal blocks...
Rank [1/4]: Calculate diagonal blocks...
Rank [3/4]: Calculate diagonal blocks...
Rank [0/4]: Start tile-wise ring communication...
Rank [0/4]: Starting iteration 1
Rank [2/4]: Start tile-wise ring communication...
Rank [2/4]: Starting iteration 1
Rank [1/4]: Start tile-wise ring communication...
Rank [1/4]: Starting iteration 1
Rank [3/4]: Start tile-wise ring communication...
Rank [3/4]: Starting iteration 1
Rank [3/4]: Starting iteration 2
Rank [2/4]: Starting iteration 2
Rank [1/4]: Starting iteration 2
Rank [0/4]: Starting iteration 2
Rank [2/4]: [DONE]
Rank [3/4]: [DONE]
Rank [0/4]: [DONE]
Rank [1/4]: [DONE]
Rank [0/4]: Local distance matrix has shape [12500, 50000].
Process-averaged run time: 2.8935788962990046 s
Rank [1/4]: Local distance matrix has shape [12500, 50000].
Rank [2/4]: Local distance matrix has shape [12500, 50000].
Rank [3/4]: Local distance matrix has shape [12500, 50000].
============================= JOB FEEDBACK =============================
NodeName=uc2n[001-004]
Job ID: 24894111
Cluster: uc2
User/Group: ku4408/scc
State: COMPLETED (exit code 0)
Nodes: 4
Cores per node: 80
CPU Utilized: 00:03:46
CPU Efficiency: 0.57% of 11:01:20 core-walltime
Job Wall-clock time: 00:02:04
Memory Utilized: 3.97 GB
Memory Efficiency: 1.13% of 351.56 GB
#!/bin/bash
#SBATCH --job-name=cdist_strong_4 # job name
#SBATCH --partition=multiple # queue for resource allocation
#SBATCH --nodes=4 # number of nodes to be used
#SBATCH --time=4:00 # wall-clock time limit
#SBATCH --cpus-per-task=40 # number of CPUs required per MPI task
#SBATCH --ntasks-per-node=1 # maximum count of tasks per node
#SBATCH --mail-type=ALL # Notify user by email when certain event types occur.
export PYDIR=./
export VENVDIR=~/scai-venv/
# Set up modules.
module purge # Unload all currently loaded modules.
module load compiler/gnu/13.3 # Load required modules.
module load mpi/openmpi/4.1
module load devel/cuda/12.4
module load lib/hdf5/1.14.4-gnu-13.3-openmpi-4.1
source ${VENVDIR}/bin/activate # Activate your virtual environment.
for i in 1 2 3 4 5 6 7 8 9 10 11
do
echo "Run $i"
mpirun python -u ${PYDIR}/cdist_kp.py
done
#!/bin/bash
#SBATCH --job-name=cdist_weak_4 # job name
#SBATCH --partition=multiple # queue for resource allocation
#SBATCH --nodes=4 # number of nodes to be used
#SBATCH --time=10:00 # wall-clock time limit
#SBATCH --cpus-per-task=40 # number of CPUs required per MPI task
#SBATCH --ntasks-per-node=1 # maximum count of tasks per node
#SBATCH --mail-type=ALL # Notify user by email when certain event types occur.
export PYDIR=./
export VENVDIR=~/scai-venv/
# Set up modules.
module purge # Unload all currently loaded modules.
module load compiler/gnu/13.3 # Load required modules.
module load mpi/openmpi/4.1
module load devel/cuda/12.4
module load lib/hdf5/1.14.4-gnu-13.3-openmpi-4.1
source ${VENVDIR}/bin/activate # Activate your virtual environment.
for i in 1 2 3 4 5 6 7 8 9 10 11
do
echo "Run $i"
mpirun python -u ${PYDIR}/cdist_weak.py
done