include / par

parUtils.h File Reference

A set of parallel utilities. More...

#include "mpi.h"
#include <vector>
#include "dendro.h"
#include "parUtils.txx"

Go to the source code of this file.

Namespaces

namespace par

Defines

#define KEEP_HIGH   100

#define KEEP_LOW   101

#define PROF_A2AV_WAIT_BEGIN

#define PROF_A2AV_WAIT_END

#define PROF_PAR_ALL2ALL_BEGIN

#define PROF_PAR_ALL2ALL_END   return 1;

#define PROF_PAR_ALL2ALLV_DENSE_BEGIN

#define PROF_PAR_ALL2ALLV_DENSE_END   return 1;

#define PROF_PAR_ALL2ALLV_SPARSE_BEGIN

#define PROF_PAR_ALL2ALLV_SPARSE_END   return 1;

#define PROF_PAR_ALLGATHER_BEGIN

#define PROF_PAR_ALLGATHER_END   return 1;

#define PROF_PAR_ALLGATHERV_BEGIN

#define PROF_PAR_ALLGATHERV_END   return 1;

#define PROF_PAR_ALLREDUCE_BEGIN

#define PROF_PAR_ALLREDUCE_END   return 1;

#define PROF_PAR_BCAST_BEGIN

#define PROF_PAR_BCAST_END   return 1;

#define PROF_PAR_CONCAT_BEGIN

#define PROF_PAR_CONCAT_END   return 1;

#define PROF_PAR_GATHER_BEGIN

#define PROF_PAR_GATHER_END   return 1;

#define PROF_PAR_REDUCE_BEGIN

#define PROF_PAR_REDUCE_END   return 1;

#define PROF_PAR_SCAN_BEGIN

#define PROF_PAR_SCAN_END   return 1;

#define PROF_PAR_SCATTER_BEGIN

#define PROF_PAR_SCATTER_END   return 1;

#define PROF_PAR_SENDRECV_BEGIN

#define PROF_PAR_SENDRECV_END   return 1;

#define PROF_PARTW_BEGIN

#define PROF_PARTW_END   return 1;

#define PROF_REMDUP_BEGIN

#define PROF_REMDUP_END   return 1;

#define PROF_SEARCH_BEGIN

#define PROF_SEARCH_END   return 1;

#define PROF_SORT_BEGIN

#define PROF_SORT_END   return 1;

#define PROF_SPLIT_COMM_2WAY_BEGIN

#define PROF_SPLIT_COMM_2WAY_END   return 1;

#define PROF_SPLIT_COMM_BEGIN

#define PROF_SPLIT_COMM_END   return 1;

Functions

template<typename T>

void bitonicSort (std::vector< T > &in, MPI_Comm comm)

An implementation of parallel bitonic sort that does not expect the number of processors to be a power of 2. In fact, the number of processors can even be odd. Moreover, we do not even expect the length of the vector (neither locally nor globally) to be a power of 2 or even. Moreover, each processor can call this with a different number of elements. However, we do expect that 'in' atleast has 1 element on each processor. This recursively calls the function bitonicSort_binary, followed by a special parallel merge.

template<typename T>

void bitonicSort_binary (std::vector< T > &in, MPI_Comm comm)

An implementation of parallel bitonic sort that expects the number of processors to be a power of 2. However, unlike most implementations, we do not expect the length of the vector (neither locally nor globally) to be a power of 2 or even. Moreover, each processor can call this with a different number of elements. However, we do expect that 'in' atleast has 1 element on each processor.

template<typename T>

int concatenate (std::vector< T > &listA, std::vector< T > &listB, MPI_Comm comm)

A parallel concatenation function. listB is appended (globally) to listA and the result is stored in listA. An useful application of this function is when listA and listB are individually sorted (globally) and the smallest element in listB is greater than the largest element in listA and we want to create a merged list that is sorted.

template<typename T>

unsigned int defaultWeight (const T *a)

template<typename T>

int maxLowerBound (const std::vector< T > &keys, const std::vector< T > &searchList, std::vector< T > &results, MPI_Comm comm)

A parallel search function.

template<typename T>

void MergeLists (std::vector< T > &listA, std::vector< T > &listB, int KEEP_WHAT)

Merges lists A, and B, retaining either the low or the High in list A.

template<typename T>

void MergeSplit (std::vector< T > &local_list, int which_keys, int partner, MPI_Comm comm)

The main operation in the parallel bitonic sort algorithm. This implements the compare-split operation.

template<typename T>

int Mpi_Allgather (T *sendbuf, T *recvbuf, int count, MPI_Comm comm)

template<typename T>

int Mpi_Allgatherv (T *sendbuf, int sendcount, T *recvbuf, int *recvcounts, int *displs, MPI_Comm comm)

template<typename T>

int Mpi_Allreduce (T *sendbuf, T *recvbuf, int count, MPI_Op op, MPI_Comm comm)

template<typename T>

int Mpi_Alltoall (T *sendbuf, T *recvbuf, int count, MPI_Comm comm)

template<typename T>

int Mpi_Alltoallv_dense (T *sendbuf, int *sendcnts, int *sdispls, T *recvbuf, int *recvcnts, int *rdispls, MPI_Comm comm)

template<typename T>

int Mpi_Alltoallv_sparse (T *sendbuf, int *sendcnts, int *sdispls, T *recvbuf, int *recvcnts, int *rdispls, MPI_Comm comm)

template<typename T>

int Mpi_Bcast (T *buffer, int count, int root, MPI_Comm comm)

template<typename T>

int Mpi_Gather (T *sendBuffer, T *recvBuffer, int count, int root, MPI_Comm comm)

template<typename T>

int Mpi_Irecv (T *buf, int count, int source, int tag, MPI_Comm comm, MPI_Request *request)

template<typename T>

int Mpi_Issend (T *buf, int count, int dest, int tag, MPI_Comm comm, MPI_Request *request)

template<typename T>

int Mpi_Recv (T *buf, int count, int source, int tag, MPI_Comm comm, MPI_Status *status)

template<typename T>

int Mpi_Reduce (T *sendbuf, T *recvbuf, int count, MPI_Op op, int root, MPI_Comm comm)

template<typename T>

int Mpi_Scan (T *sendbuf, T *recvbuf, int count, MPI_Op op, MPI_Comm comm)

template<typename T, typename S>

int Mpi_Sendrecv (T *sendBuf, int sendCount, int dest, int sendTag, S *recvBuf, int recvCount, int source, int recvTag, MPI_Comm comm, MPI_Status *status)

template<typename T>

void Par_bitonic_merge_incr (std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)

template<typename T>

void Par_bitonic_sort_decr (std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)

template<typename T>

void Par_bitonic_sort_incr (std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)

template<typename T>

int partitionW (std::vector< T > &vec, unsigned int(*getWeight)(const T *), MPI_Comm comm)

A parallel weighted partitioning function. In our implementation, we do not pose any restriction on the input or the number of processors. This function can be used with an odd number of processors as well. Some processors can pass an empty vector as input. The relative ordering of the elements is preserved.

template<typename T>

int removeDuplicates (std::vector< T > &nodes, bool isSorted, MPI_Comm comm)

Removes duplicates in parallel. If the input is not sorted, sample sort will be called within the function to sort the vector and then duplicates will be removed.

template<typename T>

int sampleSort (std::vector< T > &in, std::vector< T > &out, MPI_Comm comm)

A parallel sample sort implementation. In our implementation, we do not pose any restriction on the input or the number of processors. This function can be used with an odd number of processors as well. Some processors can pass an empty vector as input. If the total number of elements in the vector (globally) is fewer than 10*p^2, where p is the number of processors, then we will use bitonic sort instead of sample sort to sort the vector. We use a paralle bitonic sort to sort the samples in the sample sort algorithm. Hence, the complexity of the algorithm is O(n/p log n/p) + O(p log p). Here, n is the global length of the vector and p is the number of processors.

template<typename T>

int scatterValues (std::vector< T > &in, std::vector< T > &out, DendroIntL outSz, MPI_Comm comm)

Re-distributes a STL vector, preserving the relative ordering of the elements.

Detailed Description

A set of parallel utilities.

Author:

Rahul S. Sampath, rahul.sampath@gmail.com

Hari Sundar, hsundar@gmail.com

Shravan Veerapaneni, shravan@seas.upenn.edu

Santi Swaroop Adavani, santis@gmail.com

Definition in file parUtils.h.

Define Documentation

#define KEEP_HIGH 100

Definition at line 14 of file parUtils.h.

#define KEEP_LOW 101

Definition at line 15 of file parUtils.h.

#define PROF_A2AV_WAIT_BEGIN

Definition at line 192 of file parUtils.h.
Referenced by par::Mpi_Alltoallv_sparse().

#define PROF_A2AV_WAIT_END

Definition at line 213 of file parUtils.h.
Referenced by par::Mpi_Alltoallv_sparse().

#define PROF_PAR_ALL2ALL_BEGIN

Definition at line 203 of file parUtils.h.
Referenced by par::Mpi_Alltoall().

#define PROF_PAR_ALL2ALL_END return 1;

Definition at line 224 of file parUtils.h.
Referenced by par::Mpi_Alltoall().

#define PROF_PAR_ALL2ALLV_DENSE_BEGIN

Definition at line 207 of file parUtils.h.
Referenced by par::Mpi_Alltoallv_dense().

#define PROF_PAR_ALL2ALLV_DENSE_END return 1;

Definition at line 228 of file parUtils.h.
Referenced by par::Mpi_Alltoallv_dense().

#define PROF_PAR_ALL2ALLV_SPARSE_BEGIN

Definition at line 206 of file parUtils.h.
Referenced by par::Mpi_Alltoallv_sparse().

#define PROF_PAR_ALL2ALLV_SPARSE_END return 1;

Definition at line 227 of file parUtils.h.
Referenced by par::Mpi_Alltoallv_sparse().

#define PROF_PAR_ALLGATHER_BEGIN

Definition at line 205 of file parUtils.h.
Referenced by par::Mpi_Allgather().

#define PROF_PAR_ALLGATHER_END return 1;

Definition at line 226 of file parUtils.h.
Referenced by par::Mpi_Allgather().

#define PROF_PAR_ALLGATHERV_BEGIN

Definition at line 204 of file parUtils.h.
Referenced by par::Mpi_Allgatherv().

#define PROF_PAR_ALLGATHERV_END return 1;

Definition at line 225 of file parUtils.h.
Referenced by par::Mpi_Allgatherv().

#define PROF_PAR_ALLREDUCE_BEGIN

Definition at line 202 of file parUtils.h.
Referenced by par::Mpi_Allreduce().

#define PROF_PAR_ALLREDUCE_END return 1;

Definition at line 223 of file parUtils.h.
Referenced by par::Mpi_Allreduce().

#define PROF_PAR_BCAST_BEGIN

Definition at line 198 of file parUtils.h.
Referenced by par::Mpi_Bcast().

#define PROF_PAR_BCAST_END return 1;

Definition at line 219 of file parUtils.h.
Referenced by par::Mpi_Bcast().

#define PROF_PAR_CONCAT_BEGIN

Definition at line 208 of file parUtils.h.
Referenced by par::concatenate().

#define PROF_PAR_CONCAT_END return 1;

Definition at line 229 of file parUtils.h.
Referenced by par::concatenate().

#define PROF_PAR_GATHER_BEGIN

Definition at line 199 of file parUtils.h.
Referenced by par::Mpi_Gather().

#define PROF_PAR_GATHER_END return 1;

Definition at line 220 of file parUtils.h.
Referenced by par::Mpi_Gather().

#define PROF_PAR_REDUCE_BEGIN

Definition at line 201 of file parUtils.h.
Referenced by par::Mpi_Reduce().

#define PROF_PAR_REDUCE_END return 1;

Definition at line 222 of file parUtils.h.
Referenced by par::Mpi_Reduce().

#define PROF_PAR_SCAN_BEGIN

Definition at line 200 of file parUtils.h.
Referenced by par::Mpi_Scan().

#define PROF_PAR_SCAN_END return 1;

Definition at line 221 of file parUtils.h.
Referenced by par::Mpi_Scan().

#define PROF_PAR_SCATTER_BEGIN

Definition at line 196 of file parUtils.h.
Referenced by par::scatterValues().

#define PROF_PAR_SCATTER_END return 1;

Definition at line 217 of file parUtils.h.
Referenced by par::scatterValues().

#define PROF_PAR_SENDRECV_BEGIN

Definition at line 197 of file parUtils.h.
Referenced by par::Mpi_Sendrecv().

#define PROF_PAR_SENDRECV_END return 1;

Definition at line 218 of file parUtils.h.
Referenced by par::Mpi_Sendrecv().

#define PROF_PARTW_BEGIN

Definition at line 211 of file parUtils.h.
Referenced by par::partitionW().

#define PROF_PARTW_END return 1;

Definition at line 232 of file parUtils.h.
Referenced by par::partitionW().

#define PROF_REMDUP_BEGIN

Definition at line 210 of file parUtils.h.
Referenced by par::removeDuplicates().

#define PROF_REMDUP_END return 1;

Definition at line 231 of file parUtils.h.
Referenced by par::removeDuplicates().

#define PROF_SEARCH_BEGIN

Definition at line 195 of file parUtils.h.
Referenced by par::maxLowerBound().

#define PROF_SEARCH_END return 1;

Definition at line 216 of file parUtils.h.
Referenced by par::maxLowerBound().

#define PROF_SORT_BEGIN

Definition at line 209 of file parUtils.h.
Referenced by par::sampleSort().

#define PROF_SORT_END return 1;

Definition at line 230 of file parUtils.h.
Referenced by par::sampleSort().

#define PROF_SPLIT_COMM_2WAY_BEGIN

Definition at line 193 of file parUtils.h.
Referenced by par::splitComm2way().

#define PROF_SPLIT_COMM_2WAY_END return 1;

Definition at line 214 of file parUtils.h.
Referenced by par::splitComm2way().

#define PROF_SPLIT_COMM_BEGIN

Definition at line 194 of file parUtils.h.
Referenced by par::splitCommUsingSplittingRank().

#define PROF_SPLIT_COMM_END return 1;

Definition at line 215 of file parUtils.h.
Referenced by par::splitCommUsingSplittingRank().

Function Documentation

template<typename T>

void par::bitonicSort ( std::vector< T > & in,

MPI_Comm comm

)

An implementation of parallel bitonic sort that does not expect the number of processors to be a power of 2. In fact, the number of processors can even be odd. Moreover, we do not even expect the length of the vector (neither locally nor globally) to be a power of 2 or even. Moreover, each processor can call this with a different number of elements. However, we do expect that 'in' atleast has 1 element on each processor. This recursively calls the function bitonicSort_binary, followed by a special parallel merge.

Parameters:

in the vector to be sorted

Author:
Hari Sundar

See also:
bitonicSort_binary

Definition at line 1623 of file parUtils.txx.
References par::Par_bitonic_merge_incr(), sort(), par::splitCommBinary(), and par::splitCommBinaryNoFlip().

template<typename T>

void par::bitonicSort_binary ( std::vector< T > & in,

MPI_Comm comm

)

An implementation of parallel bitonic sort that expects the number of processors to be a power of 2. However, unlike most implementations, we do not expect the length of the vector (neither locally nor globally) to be a power of 2 or even. Moreover, each processor can call this with a different number of elements. However, we do expect that 'in' atleast has 1 element on each processor.

Parameters:

in the vector to be sorted

Author:
Hari Sundar

Definition at line 1593 of file parUtils.txx.

template<typename T>

int par::concatenate ( std::vector< T > & listA,

std::vector< T > & listB,

MPI_Comm comm

)

A parallel concatenation function. listB is appended (globally) to listA and the result is stored in listA. An useful application of this function is when listA and listB are individually sorted (globally) and the smallest element in listB is greater than the largest element in listA and we want to create a merged list that is sorted.

Parameters:

listA a distributed vector, the result is stored in listA

listB another distributed vector that is appended to listA listA must not be empty on any of the calling processors. listB can be empty on some of the calling processors. listB will be cleared within the function.

comm the communicator

Author:
Rahul Sampath

Definition at line 547 of file parUtils.txx.
References DendroIntL, PROF_PAR_CONCAT_BEGIN, and PROF_PAR_CONCAT_END.

template<typename T>

unsigned int par::defaultWeight ( const T * a )

Definition at line 393 of file parUtils.txx.

template<typename T>

int par::maxLowerBound ( const std::vector< T > & keys,

const std::vector< T > & searchList,

std::vector< T > & results,

MPI_Comm comm

)

A parallel search function.

Author:
Rahul Sampath

Parameters:

keys locally sorted unique list of keys

searchList globally sorted unique list. No processor must call this function with an empty list.

results maximum lower bound in searchList for the corresponding key

comm MPI communicator

Returns:
int errorcode

Definition at line 764 of file parUtils.txx.
References PROF_SEARCH_BEGIN, and PROF_SEARCH_END.

template<typename T>

void par::MergeLists ( std::vector< T > & listA,

std::vector< T > & listB,

int KEEP_WHAT

)

Merges lists A, and B, retaining either the low or the High in list A.

Author:
Hari Sundar

Parameters:

listA Input list, and where the output is stored.

listB Second input list.

KEEP_WHAT determines whether to retain the High or the low values from A and B. One of KEEP_HIGH or KEEP_LOW.

Merging the two lists when their sizes are not the same is a bit involved. The major condition that needs to be used is that all elements that are less than max(min(A), min(B)) are retained by the KEEP_LOW processor, and similarly all elements that are larger larger than min(max(A), max(B)) are retained by the KEEP_HIGH processor.
The reason for this is that, on the Keep_Low side,
max(min(A), min(B)) > min(A) > max(A-)
and similarly on the Keep_high side,
min(max(A), max(B)) < max(A) < min(A+)
which guarantees that the merged lists remain bitonic.
Definition at line 1673 of file parUtils.txx.

template<typename T>

void par::MergeSplit ( std::vector< T > & local_list,

int which_keys,

int partner,

MPI_Comm comm

)

The main operation in the parallel bitonic sort algorithm. This implements the compare-split operation.

Author:
Hari Sundar

Parameters:

which_keys is one of KEEP_HIGH or KEEP_LOW

partner is the processor with which to Merge and Split.

local_list the input vector

comm the communicator

Definition at line 1476 of file parUtils.txx.

template<typename T>

int par::Mpi_Allgather ( T * sendbuf,

T * recvbuf,

int count,

MPI_Comm comm

)

Author:
Rahul S. Sampath

Definition at line 196 of file parUtils.txx.
References PROF_PAR_ALLGATHER_BEGIN, and PROF_PAR_ALLGATHER_END.

template<typename T>

int par::Mpi_Allgatherv ( T * sendbuf,

int sendcount,

T * recvbuf,

int * recvcounts,

int * displs,

MPI_Comm comm

)

Author:
Rahul S. Sampath

Definition at line 147 of file parUtils.txx.
References PROF_PAR_ALLGATHERV_BEGIN, and PROF_PAR_ALLGATHERV_END.

template<typename T>

int par::Mpi_Allreduce ( T * sendbuf,

T * recvbuf,

int count,

MPI_Op op,

MPI_Comm comm

) [inline]

Author:
Rahul S. Sampath

Definition at line 85 of file parUtils.txx.
References PROF_PAR_ALLREDUCE_BEGIN, and PROF_PAR_ALLREDUCE_END.

template<typename T>

int par::Mpi_Alltoall ( T * sendbuf,

T * recvbuf,

int count,

MPI_Comm comm

) [inline]

Author:
Rahul S. Sampath

Definition at line 97 of file parUtils.txx.
References PROF_PAR_ALL2ALL_BEGIN, and PROF_PAR_ALL2ALL_END.
Referenced by ot::flagNodesType2().

template<typename T>

int par::Mpi_Alltoallv_dense ( T * sendbuf,

int * sendcnts,

int * sdispls,

T * recvbuf,

int * recvcnts,

int * rdispls,

MPI_Comm comm

)

Author:
Rahul S. Sampath

Definition at line 318 of file parUtils.txx.
References PROF_PAR_ALL2ALLV_DENSE_BEGIN, and PROF_PAR_ALL2ALLV_DENSE_END.

template<typename T>

int par::Mpi_Alltoallv_sparse ( T * sendbuf,

int * sendcnts,

int * sdispls,

T * recvbuf,

int * recvcnts,

int * rdispls,

MPI_Comm comm

)

Author:
Rahul S. Sampath

Definition at line 226 of file parUtils.txx.
References PROF_A2AV_WAIT_BEGIN, PROF_A2AV_WAIT_END, PROF_PAR_ALL2ALLV_SPARSE_BEGIN, and PROF_PAR_ALL2ALLV_SPARSE_END.

template<typename T>

int par::Mpi_Bcast ( T * buffer,

int count,

int root,

MPI_Comm comm

) [inline]

Author:
Rahul S. Sampath

Definition at line 123 of file parUtils.txx.
References PROF_PAR_BCAST_BEGIN, and PROF_PAR_BCAST_END.

template<typename T>

int par::Mpi_Gather ( T * sendBuffer,

T * recvBuffer,

int count,

int root,

MPI_Comm comm

) [inline]

Author:
Rahul S. Sampath

Definition at line 110 of file parUtils.txx.
References PROF_PAR_GATHER_BEGIN, and PROF_PAR_GATHER_END.

template<typename T>

int par::Mpi_Irecv ( T * buf,

int count,

int source,

int tag,

MPI_Comm comm,

MPI_Request * request

) [inline]

Definition at line 50 of file parUtils.txx.

template<typename T>

int par::Mpi_Issend ( T * buf,

int count,

int dest,

int tag,

MPI_Comm comm,

MPI_Request * request

) [inline]

Definition at line 28 of file parUtils.txx.

template<typename T>

int par::Mpi_Recv ( T * buf,

int count,

int source,

int tag,

MPI_Comm comm,

MPI_Status * status

) [inline]

Definition at line 39 of file parUtils.txx.

template<typename T>

int par::Mpi_Reduce ( T * sendbuf,

T * recvbuf,

int count,

MPI_Op op,

int root,

MPI_Comm comm

) [inline]

Author:
Rahul S. Sampath

Definition at line 135 of file parUtils.txx.
References PROF_PAR_REDUCE_BEGIN, and PROF_PAR_REDUCE_END.

template<typename T>

int par::Mpi_Scan ( T * sendbuf,

T * recvbuf,

int count,

MPI_Op op,

MPI_Comm comm

) [inline]

Author:
Rahul S. Sampath

Definition at line 73 of file parUtils.txx.
References PROF_PAR_SCAN_BEGIN, and PROF_PAR_SCAN_END.

template<typename T, typename S>

int par::Mpi_Sendrecv ( T * sendBuf,

int sendCount,

int dest,

int sendTag,

S * recvBuf,

int recvCount,

int source,

int recvTag,

MPI_Comm comm,

MPI_Status * status

) [inline]

Author:
Rahul S. Sampath

Definition at line 61 of file parUtils.txx.
References PROF_PAR_SENDRECV_BEGIN, and PROF_PAR_SENDRECV_END.

template<typename T>

void par::Par_bitonic_merge_incr ( std::vector< T > & local_list,

int proc_set_size,

MPI_Comm comm

)

Author:
Hari Sundar

Definition at line 1571 of file parUtils.txx.
References binOp::getPrevHighestPowerOfTwo().
Referenced by par::bitonicSort().

template<typename T>

void par::Par_bitonic_sort_decr ( std::vector< T > & local_list,

int proc_set_size,

MPI_Comm comm

)

Author:
Hari Sundar

Definition at line 1539 of file parUtils.txx.

template<typename T>

void par::Par_bitonic_sort_incr ( std::vector< T > & local_list,

int proc_set_size,

MPI_Comm comm

)

Author:
Hari Sundar

Definition at line 1507 of file parUtils.txx.

template<typename T>

int par::partitionW ( std::vector< T > & vec,

unsigned int(*)(const T *) getWeight,

MPI_Comm comm

)

A parallel weighted partitioning function. In our implementation, we do not pose any restriction on the input or the number of processors. This function can be used with an odd number of processors as well. Some processors can pass an empty vector as input. The relative ordering of the elements is preserved.

Author:
Hari Sundar
Rahul Sampath

Parameters:

vec the input vector

getWeight function pointer to compute the weight of each element. If you pass NULL, then every element will get a weight equal to 1.

comm the communicator

Definition at line 930 of file parUtils.txx.
References DendroIntL, PROF_PARTW_BEGIN, and PROF_PARTW_END.

template<typename T>

int par::removeDuplicates ( std::vector< T > & nodes,

bool isSorted,

MPI_Comm comm

)

Removes duplicates in parallel. If the input is not sorted, sample sort will be called within the function to sort the vector and then duplicates will be removed.

Parameters:

nodes the input vector.

isSorted pass 'true' if the vector is globally sorted.

comm The communicator

Author:
Rahul Sampath

Definition at line 1174 of file parUtils.txx.
References PROF_REMDUP_BEGIN, PROF_REMDUP_END, and par::splitComm2way().

template<typename T>

int par::sampleSort ( std::vector< T > & in,

std::vector< T > & out,

MPI_Comm comm

)

A parallel sample sort implementation. In our implementation, we do not pose any restriction on the input or the number of processors. This function can be used with an odd number of processors as well. Some processors can pass an empty vector as input. If the total number of elements in the vector (globally) is fewer than 10*p^2, where p is the number of processors, then we will use bitonic sort instead of sample sort to sort the vector. We use a paralle bitonic sort to sort the samples in the sample sort algorithm. Hence, the complexity of the algorithm is O(n/p log n/p) + O(p log p). Here, n is the global length of the vector and p is the number of processors.

Author:
Hari Sundar
Rahul Sampath
Santi Swaroop Adavani
Shravan Veerapaneni

Parameters:

in the input vector

out the output vector

comm the communicator

Definition at line 1282 of file parUtils.txx.
References DendroIntL, PROF_SORT_BEGIN, PROF_SORT_END, sort(), and par::splitCommUsingSplittingRank().

template<typename T>

int par::scatterValues ( std::vector< T > & in,

std::vector< T > & out,

DendroIntL outSz,

MPI_Comm comm

)

Re-distributes a STL vector, preserving the relative ordering of the elements.

Author:
Rahul S. Sampath

Parameters:

in The input vector

out The output vector. Memory for this vector will be allocated within the function.

outSz The local size of the output vector on the calling processor.

comm The communicator

Returns:
error flag

Definition at line 398 of file parUtils.txx.
References DendroIntL, PROF_PAR_SCATTER_BEGIN, and PROF_PAR_SCATTER_END.
Referenced by ot::prolongMatVecType2(), and ot::restrictMatVecType2().

Generated on Tue Mar 24 16:14:13 2009 for DENDRO by

1.3.9.1


Namespaces
namespace	par
Defines
#define	KEEP_HIGH 100
#define	KEEP_LOW 101
#define	PROF_A2AV_WAIT_BEGIN
#define	PROF_A2AV_WAIT_END
#define	PROF_PAR_ALL2ALL_BEGIN
#define	PROF_PAR_ALL2ALL_END return 1;
#define	PROF_PAR_ALL2ALLV_DENSE_BEGIN
#define	PROF_PAR_ALL2ALLV_DENSE_END return 1;
#define	PROF_PAR_ALL2ALLV_SPARSE_BEGIN
#define	PROF_PAR_ALL2ALLV_SPARSE_END return 1;
#define	PROF_PAR_ALLGATHER_BEGIN
#define	PROF_PAR_ALLGATHER_END return 1;
#define	PROF_PAR_ALLGATHERV_BEGIN
#define	PROF_PAR_ALLGATHERV_END return 1;
#define	PROF_PAR_ALLREDUCE_BEGIN
#define	PROF_PAR_ALLREDUCE_END return 1;
#define	PROF_PAR_BCAST_BEGIN
#define	PROF_PAR_BCAST_END return 1;
#define	PROF_PAR_CONCAT_BEGIN
#define	PROF_PAR_CONCAT_END return 1;
#define	PROF_PAR_GATHER_BEGIN
#define	PROF_PAR_GATHER_END return 1;
#define	PROF_PAR_REDUCE_BEGIN
#define	PROF_PAR_REDUCE_END return 1;
#define	PROF_PAR_SCAN_BEGIN
#define	PROF_PAR_SCAN_END return 1;
#define	PROF_PAR_SCATTER_BEGIN
#define	PROF_PAR_SCATTER_END return 1;
#define	PROF_PAR_SENDRECV_BEGIN
#define	PROF_PAR_SENDRECV_END return 1;
#define	PROF_PARTW_BEGIN
#define	PROF_PARTW_END return 1;
#define	PROF_REMDUP_BEGIN
#define	PROF_REMDUP_END return 1;
#define	PROF_SEARCH_BEGIN
#define	PROF_SEARCH_END return 1;
#define	PROF_SORT_BEGIN
#define	PROF_SORT_END return 1;
#define	PROF_SPLIT_COMM_2WAY_BEGIN
#define	PROF_SPLIT_COMM_2WAY_END return 1;
#define	PROF_SPLIT_COMM_BEGIN
#define	PROF_SPLIT_COMM_END return 1;
Functions
template<typename T>
void	bitonicSort (std::vector< T > &in, MPI_Comm comm)
	An implementation of parallel bitonic sort that does not expect the number of processors to be a power of 2. In fact, the number of processors can even be odd. Moreover, we do not even expect the length of the vector (neither locally nor globally) to be a power of 2 or even. Moreover, each processor can call this with a different number of elements. However, we do expect that 'in' atleast has 1 element on each processor. This recursively calls the function bitonicSort_binary, followed by a special parallel merge.
template<typename T>
void	bitonicSort_binary (std::vector< T > &in, MPI_Comm comm)
	An implementation of parallel bitonic sort that expects the number of processors to be a power of 2. However, unlike most implementations, we do not expect the length of the vector (neither locally nor globally) to be a power of 2 or even. Moreover, each processor can call this with a different number of elements. However, we do expect that 'in' atleast has 1 element on each processor.
template<typename T>
int	concatenate (std::vector< T > &listA, std::vector< T > &listB, MPI_Comm comm)
	A parallel concatenation function. listB is appended (globally) to listA and the result is stored in listA. An useful application of this function is when listA and listB are individually sorted (globally) and the smallest element in listB is greater than the largest element in listA and we want to create a merged list that is sorted.
template<typename T>
unsigned int	defaultWeight (const T *a)
template<typename T>
int	maxLowerBound (const std::vector< T > &keys, const std::vector< T > &searchList, std::vector< T > &results, MPI_Comm comm)
	A parallel search function.
template<typename T>
void	MergeLists (std::vector< T > &listA, std::vector< T > &listB, int KEEP_WHAT)
	Merges lists A, and B, retaining either the low or the High in list A.
template<typename T>
void	MergeSplit (std::vector< T > &local_list, int which_keys, int partner, MPI_Comm comm)
	The main operation in the parallel bitonic sort algorithm. This implements the compare-split operation.
template<typename T>
int	Mpi_Allgather (T sendbuf, T recvbuf, int count, MPI_Comm comm)
template<typename T>
int	Mpi_Allgatherv (T sendbuf, int sendcount, T recvbuf, int recvcounts, int displs, MPI_Comm comm)
template<typename T>
int	Mpi_Allreduce (T sendbuf, T recvbuf, int count, MPI_Op op, MPI_Comm comm)
template<typename T>
int	Mpi_Alltoall (T sendbuf, T recvbuf, int count, MPI_Comm comm)
template<typename T>
int	Mpi_Alltoallv_dense (T sendbuf, int sendcnts, int sdispls, T recvbuf, int recvcnts, int rdispls, MPI_Comm comm)
template<typename T>
int	Mpi_Alltoallv_sparse (T sendbuf, int sendcnts, int sdispls, T recvbuf, int recvcnts, int rdispls, MPI_Comm comm)
template<typename T>
int	Mpi_Bcast (T *buffer, int count, int root, MPI_Comm comm)
template<typename T>
int	Mpi_Gather (T sendBuffer, T recvBuffer, int count, int root, MPI_Comm comm)
template<typename T>
int	Mpi_Irecv (T buf, int count, int source, int tag, MPI_Comm comm, MPI_Request request)
template<typename T>
int	Mpi_Issend (T buf, int count, int dest, int tag, MPI_Comm comm, MPI_Request request)
template<typename T>
int	Mpi_Recv (T buf, int count, int source, int tag, MPI_Comm comm, MPI_Status status)
template<typename T>
int	Mpi_Reduce (T sendbuf, T recvbuf, int count, MPI_Op op, int root, MPI_Comm comm)
template<typename T>
int	Mpi_Scan (T sendbuf, T recvbuf, int count, MPI_Op op, MPI_Comm comm)
template<typename T, typename S>
int	Mpi_Sendrecv (T sendBuf, int sendCount, int dest, int sendTag, S recvBuf, int recvCount, int source, int recvTag, MPI_Comm comm, MPI_Status *status)
template<typename T>
void	Par_bitonic_merge_incr (std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)
template<typename T>
void	Par_bitonic_sort_decr (std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)
template<typename T>
void	Par_bitonic_sort_incr (std::vector< T > &local_list, int proc_set_size, MPI_Comm comm)
template<typename T>
int	partitionW (std::vector< T > &vec, unsigned int(getWeight)(const T ), MPI_Comm comm)
	A parallel weighted partitioning function. In our implementation, we do not pose any restriction on the input or the number of processors. This function can be used with an odd number of processors as well. Some processors can pass an empty vector as input. The relative ordering of the elements is preserved.
template<typename T>
int	removeDuplicates (std::vector< T > &nodes, bool isSorted, MPI_Comm comm)
	Removes duplicates in parallel. If the input is not sorted, sample sort will be called within the function to sort the vector and then duplicates will be removed.
template<typename T>
int	sampleSort (std::vector< T > &in, std::vector< T > &out, MPI_Comm comm)
	A parallel sample sort implementation. In our implementation, we do not pose any restriction on the input or the number of processors. This function can be used with an odd number of processors as well. Some processors can pass an empty vector as input. If the total number of elements in the vector (globally) is fewer than 10*p^2, where p is the number of processors, then we will use bitonic sort instead of sample sort to sort the vector. We use a paralle bitonic sort to sort the samples in the sample sort algorithm. Hence, the complexity of the algorithm is O(n/p log n/p) + O(p log p). Here, n is the global length of the vector and p is the number of processors.
template<typename T>
int	scatterValues (std::vector< T > &in, std::vector< T > &out, DendroIntL outSz, MPI_Comm comm)
	Re-distributes a STL vector, preserving the relative ordering of the elements.