SimGrid  3.14.159
Versatile Simulation of Distributed Systems
reduce-ompi.c File Reference
#include "colls_private.h"
#include "coll_tuned_topo.h"

Functions

int smpi_coll_tuned_ompi_reduce_generic (void *sendbuf, void *recvbuf, int original_count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, ompi_coll_tree_t *tree, int count_by_segment, int max_outstanding_reqs)
 This is a generic implementation of the reduce protocol. More...
 
int smpi_coll_tuned_reduce_ompi_chain (void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
 
int smpi_coll_tuned_reduce_ompi_pipeline (void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
 
int smpi_coll_tuned_reduce_ompi_binary (void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
 
int smpi_coll_tuned_reduce_ompi_binomial (void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
 
int smpi_coll_tuned_reduce_ompi_in_order_binary (void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
 
int smpi_coll_tuned_reduce_ompi_basic_linear (void *sbuf, void *rbuf, int count, MPI_Datatype dtype, MPI_Op op, int root, MPI_Comm comm)
 

Function Documentation

§ smpi_coll_tuned_ompi_reduce_generic()

int smpi_coll_tuned_ompi_reduce_generic ( void sendbuf,
void recvbuf,
int  original_count,
MPI_Datatype  datatype,
MPI_Op  op,
int  root,
MPI_Comm  comm,
ompi_coll_tree_t tree,
int  count_by_segment,
int  max_outstanding_reqs 
)

This is a generic implementation of the reduce protocol.

It used the tree provided as an argument and execute all operations using a segment of count times a datatype. For the last communication it will update the count in order to limit the number of datatype to the original count (original_count)

Note that for non-commutative operations we cannot save memory copy for the first block: thus we must copy sendbuf to accumbuf on intermediate to keep the optimized loop happy.

Determine number of segments and number of elements sent per operation

We try to overlap communication: either with next segment or with the next child

§ smpi_coll_tuned_reduce_ompi_chain()

int smpi_coll_tuned_reduce_ompi_chain ( void sendbuf,
void recvbuf,
int  count,
MPI_Datatype  datatype,
MPI_Op  op,
int  root,
MPI_Comm  comm 
)

Determine number of segments and number of elements sent per operation

§ smpi_coll_tuned_reduce_ompi_pipeline()

int smpi_coll_tuned_reduce_ompi_pipeline ( void sendbuf,
void recvbuf,
int  count,
MPI_Datatype  datatype,
MPI_Op  op,
int  root,
MPI_Comm  comm 
)

Determine number of segments and number of elements sent per operation

§ smpi_coll_tuned_reduce_ompi_binary()

int smpi_coll_tuned_reduce_ompi_binary ( void sendbuf,
void recvbuf,
int  count,
MPI_Datatype  datatype,
MPI_Op  op,
int  root,
MPI_Comm  comm 
)

Determine number of segments and number of elements sent per operation

§ smpi_coll_tuned_reduce_ompi_binomial()

int smpi_coll_tuned_reduce_ompi_binomial ( void sendbuf,
void recvbuf,
int  count,
MPI_Datatype  datatype,
MPI_Op  op,
int  root,
MPI_Comm  comm 
)

Determine number of segments and number of elements sent per operation

§ smpi_coll_tuned_reduce_ompi_in_order_binary()

int smpi_coll_tuned_reduce_ompi_in_order_binary ( void sendbuf,
void recvbuf,
int  count,
MPI_Datatype  datatype,
MPI_Op  op,
int  root,
MPI_Comm  comm 
)

Determine number of segments and number of elements sent per operation

§ smpi_coll_tuned_reduce_ompi_basic_linear()

int smpi_coll_tuned_reduce_ompi_basic_linear ( void sbuf,
void rbuf,
int  count,
MPI_Datatype  dtype,
MPI_Op  op,
int  root,
MPI_Comm  comm 
)