Optimisation Mechanisms for MPICH-Madeleine

Runtime

LaBRI, INRIA Bordeaux - Sud-Ouest

High Performance Runtime Systems for Parallel Architectures

B. Source Code for the Performance Evaluation Programs

B.1 The Index Datatype

void sendIndexTypeFromSrcToDest(int numberOfElements, int blocks, int rank, int source,
				int dest, int numtasks, int use_hindex) {
  int          blocklengths[blocks];
  int          displacements[blocks];
  MPI_Datatype indextype;
  MPI_Status   stat;
  int          i;
  tbx_tick_t   t1;
  tbx_tick_t   t2;

  TBX_GET_TICK(t1);
  TBX_GET_TICK(t2);

  // initialise structs for datatype
  if (numberOfElements == 0) {
    for(i=0 ; i<blocks ; i++) {
      blocklengths[i] = 0;
    }
  }
  else {
    for(i=0 ; i<blocks ; i++) {
      blocklengths[i] = numberOfElements/blocks;
    }
    blocklengths[blocks-1] += numberOfElements % blocks;
  }

  displacements[0] = 0;
  for(i=1 ; i<blocks ; i++) {
    displacements[i] = blocklengths[i-1] + displacements[i-1];
  }

  if (use_hindex == TRUE) {
    for(i=0 ; i<blocks ; i++) {
      displacements[i] *= sizeof(float);
    }
  } // end if

  // create user datatype
  if (use_hindex == TRUE) {
    MPI_Type_hindexed(blocks, blocklengths, displacements, MPI_FLOAT, &indextype);
  }
  else {
    MPI_Type_indexed(blocks, blocklengths, displacements, MPI_FLOAT, &indextype);
  }

  MPI_Type_commit(&indextype);

  if (rank == source) {
    float        data[numberOfElements];

    // Initialise data to send
    if (VERBOSE) {
      printf("data = ");
    }
    for(i=0 ; i<numberOfElements ; i++) {
      data[i] = 1.0 * (i+1);
      if (VERBOSE) {
        printf("%3.1f ", data[i]);
      }
    }
    if (VERBOSE) {
      printf("\n");
    }

    // send the data to the processor 1
    TBX_GET_TICK(t1);
    MPI_Send(data, 1, indextype, dest, TAG, MPI_COMM_WORLD);

    // erase the local data
    for(i=0 ; i<numberOfElements ; i++) data[i] = -1.0;

    // receive data from processor 1
    MPI_Recv(data, numberOfElements, MPI_FLOAT, dest, TAG, MPI_COMM_WORLD, &stat);
    checkIndexIsCorrect(data, i, numberOfElements);

    TBX_GET_TICK(t2);
    fprintf(stderr, "%d\t%d\t%f\t%d\t%d-%d\n", numberOfElements, MPIR_INDEXED,
	    TBX_TIMING_DELAY(t1, t2), blocks, source, dest);
  }
  else if (rank == dest) {
    float b[numberOfElements];
    MPI_Recv(b, numberOfElements, MPI_FLOAT, source, TAG, MPI_COMM_WORLD, &stat);
    checkIndexIsCorrect(b, rank, numberOfElements);

    MPI_Send(b, 1, indextype, source, TAG, MPI_COMM_WORLD);
  }

  MPI_Type_free(&indextype);
}

B.2 The Vector Datatype

int getRealSize(int size, int blocks) {
  int realSize = size;
  while (realSize % blocks != 0) realSize ++;
  return realSize;
}

void sendVectorTypeFromSrcToDest(int size, int blocks, int rank, int source, int dest,
				 int numtasks, int use_hvector) {
  int realSize = getRealSize(sqrt(size), blocks);
  float a[realSize][realSize];
  MPI_Datatype columntype;
  int i, j;
  float *b;
  MPI_Status stat;
  tbx_tick_t   t1;
  tbx_tick_t   t2;

  int count = blocks;
  int blocklength = realSize*realSize/blocks;
  int stride = blocklength;

  TBX_GET_TICK(t1);
  TBX_GET_TICK(t2);

  // Initialise data to send
  for(i=0 ; i<realSize ; i++) {
    for(j=0 ; j<realSize ; j++) {
      a[i][j] = getValue(i, j, realSize);
      if (VERBOSE) {
        printf("%3.1f ", a[i][j]);
      }
    }
    if (VERBOSE) {
      printf("\n");
    }
  }

  // create user datatype
  if (use_hvector == TRUE) {
    MPI_Type_hvector(count, blocklength, stride*sizeof(float), MPI_FLOAT, &columntype);
  }
  else {
    MPI_Type_vector(count, blocklength, stride, MPI_FLOAT, &columntype);
  }
  MPI_Type_commit(&columntype);

  if (rank == source) {
    // send data to the process dest
    TBX_GET_TICK(t1);
    MPI_Send(&a[0][0], 1, columntype, dest, TAG, MPI_COMM_WORLD);

    // receive the data
    b = (float *) malloc(count*blocklength*sizeof(float));
    MPI_Recv(b, count*blocklength, MPI_FLOAT, dest, TAG, MPI_COMM_WORLD, &stat);
    checkVectorIsCorrect(b, rank, count, blocklength, size, stride);
    free(b);

    TBX_GET_TICK(t2);
    fprintf(stderr, "%d\t%d\t%f\t%d\t%d-%d\n", count*blocklength, MPIR_VECTOR,
	    TBX_TIMING_DELAY(t1, t2), blocks, source, dest);
  }
  else if (rank == dest) {
    // receive the data
    b = (float *) malloc(count*blocklength*sizeof(float));
    MPI_Recv(b, count*blocklength, MPI_FLOAT, source, TAG, MPI_COMM_WORLD, &stat);

    checkVectorIsCorrect(b, rank, count, blocklength, realSize, stride);
    free(b);

    MPI_Send(&a[0][0], 1, columntype, source, TAG, MPI_COMM_WORLD);
  }

  MPI_Type_free(&columntype);

B.3 The Struct Datatype

typedef struct {
  float x, y;
  int c;
  float z;
void sendStructTypeFromSrcToDest(int numberOfElements, int rank, int source, int dest,
				 int numtasks) {
  MPI_Datatype particletype;
  MPI_Aint offsets[3];
  MPI_Datatype oldtypes[3];
  int blockcounts[3];
  Particle *particles;
  Particle *p;
  int i;
  MPI_Status stat;

  tbx_tick_t   t1;
  tbx_tick_t   t2;

  TBX_GET_TICK(t1);
  TBX_GET_TICK(t2);

  // create the datatype to send and receive the data
  oldtypes[0] = MPI_FLOAT;
  oldtypes[1] = MPI_INT;
  oldtypes[2] = MPI_FLOAT;
  offsets[0] = 0;
  offsets[1] = 2 * sizeof(float);
  offsets[2] = offsets[1] + sizeof(int);
  blockcounts[0] = 2;
  blockcounts[1] = 1;
  blockcounts[2] = 1;

  /* Now define structured types and commit them */
  MPI_Type_struct(3, blockcounts, offsets, oldtypes, &particletype);
  MPI_Type_commit(&particletype);

  // Initialize the particle array and then send it to each task
  if (rank == source) {
    // Initialise data to send
    particles = (Particle *) malloc(numberOfElements * sizeof(Particle));
    for (i=0; i < numberOfElements; i++) {
      particles[i].x = (i+1) * 2.0;
      particles[i].y = (i+1) * -2.0;
      particles[i].c = (i+1) * 4;
      particles[i].z = (i+1) * 4.0;
    }

    TBX_GET_TICK(t1);
    MPI_Send(particles, numberOfElements, particletype, dest, TAG, MPI_COMM_WORLD);

    p = (Particle *) malloc(numberOfElements * sizeof(Particle));
    MPI_Recv(p, numberOfElements, particletype, dest, TAG, MPI_COMM_WORLD, &stat);
    //      checkStructIsCorrect(p, numberOfElements, rank);
    free(p);

    TBX_GET_TICK(t2);
    fprintf(stderr, "%d\t%d\t%f\t%d\t%d-%d\n", numberOfElements, MPIR_STRUCT,
	    TBX_TIMING_DELAY(t1, t2), 3, source, dest);

    free(particles);
  } // end if
  else if (rank == dest) {
    p = (Particle *) malloc(numberOfElements * sizeof(Particle));
    MPI_Recv(p, numberOfElements, particletype, source, TAG, MPI_COMM_WORLD, &stat);
    checkStructIsCorrect(p, numberOfElements, rank);

    MPI_Send(p, numberOfElements, particletype, source, TAG, MPI_COMM_WORLD);

    free(p);
  } // end else

RETURN HOME | BACK: Detailed Performance Results

Copyright © November 2006 Team Runtime