Click to See Complete Forum and Search --> : MCS lock: Pthreads + Inline Assembly implementation (SPARC)


kalenji
March 6th, 2010, 06:27 AM
Hi everyone,

I'm trying to implement MCS lock in C by means of Posix Thread and efficient assembly code for fetch_and_store, and compare_and_swap operations on a SPARCv8plus machine. To do that, I have followed an implementation provided in a post of this Forum. On the contrary, I implement fetch_and_store and compare_and_swap operations through inline assembly instructions of SPARCv8plus ISA.

However, I have problems with the implementation, because It doesn't seem to perform CAS instructions properly. I suspect that the problem may be in not to include membar instructions to assure memory consistency, or not to use volatile declarations for variables.

My code is as follows:

#include <stdio.h>
#include <stdlib.h>

struct _MCSstruct
{
int n;
volatile struct MCSnode *MCSnodes;
volatile struct MCSnode *tail;
};

typedef struct _MCSstruct MCSstruct;


#define TRUE 1
#define FALSE 0

struct MCSnode
{
int pad1[32];
int locked;
struct MCSnode * next;
int pad2[32];
};



unsigned numProcs;


struct GlobalMemory {
LOCKDEC(lockid)
int sum;
MCSstruct mystruct;
};

struct GlobalMemory *gl;

unsigned iterations;
unsigned numProcs;
int phase = 0;
TreeBar final_sync;


/////////////// AUXILIAR FUNCTIONS

static inline struct MCSnode * fetch_and_store(volatile struct MCSnode **
tail,struct MCSnode ** i) {
/*
LOCK(gl->lockid);

struct MCSnode * temp = *tail;
*tail = *i;

UNLOCK(gl->lockid);

return temp;
*/

unsigned __temp=9, __i,__hi,__li,__hli,__hhi,__lli,__lhi;

// Big endian to Little endian
__li=0xFFFF0000&((unsigned)*i);
__hi=0x0000FFFF&((unsigned)*i);

__li=__li>>16;
__hi=__hi<<16;
__i = __li | __hi;

__hhi=0xFF000000&__hi;
__hli=0x00FF0000&__hi;
__lhi=0x0000FF00&__li;
__lli=0x000000FF&__li;

__hli=__hli<<8;
__hhi=__hhi>>8;
__lli=__lli<<8;
__lhi=__lhi>>8;


__i=__hhi | __hli | __lhi | __lli;


__asm__ __volatile__ (
"1:\n\t"
"ld [%3], %0\n\t"
"cas [%3], %0, %1\n\t"
"cmp %0, %1\n\t"
"bne,a,pn %%icc, 1b\n\t"
: "+r" (__temp), "+r" (__i), "+m" (*tail), "+r" (tail)
: :"cc");


return __temp;
}



static inline int compare_and_swap(volatile struct MCSnode ** tail, struct MCSnode
** i) {

/*
int status;

LOCK(gl->lockid);

if((*tail)!= (*i) ) { UNLOCK(gl->lockid); return 0;}
*tail = NULL;

UNLOCK(gl->lockid);

return 1;
*/


unsigned __tmp2, __temp, __res;

__asm__ __volatile__ (
"ld [%3], %0\n\t"
"1:\n\t"
"mov 0, %1\n\t"
"cas [%3], %0, %1\n\t"
"cmp %0, %1\n\t"
"bne,a,pn %%icc, 1b\n\t"
"nop"
: "+r" (__temp), "+r" (__tmp2), "+m" (*tail), "+r" (tail)
: : "cc");


return 1;

}

////////////////////// MCS IMPLEMENTATION


void MCSInit(MCSstruct * MCSstr,int nodes)
{
int i;
MCSstr->n=nodes;
MCSstr->tail=NULL;
MCSstr->MCSnodes=(struct MCSnode *)malloc(nodes*sizeof(struct MCSnode));
}


void MCSLock(MCSstruct * MCSstr,int whoami)
{
struct MCSnode * i = &MCSstr->MCSnodes[whoami];
i->next = NULL;
START_ACQ
struct MCSnode * predecessor = fetch_and_store( &MCSstr->tail, &i );
if(predecessor) {
ACQ_MEMBAR
i->locked = 1;
predecessor->next = i;
while(i->locked == 1) ;
}
END_AGG;

}


void MCSUnlock(MCSstruct * MCSstr,int whoami)
{
START_REL
REL_MEMBAR
struct MCSnode * i = &MCSstr->MCSnodes[whoami];
if(!i->next) {
ACQ_MEMBAR
if(compare_and_swap( &MCSstr->tail, &i )) return;
while(!i->next);
}
i->next->locked = FALSE;
END_AGG;

}







/////////////////////////////////////////////////////////// THREAD work

void work() {
int i,j;
unsigned MyNum = getpid();


BARRIER(0,numProcs);


for (i=0;i<iterations;i++) {

MCSLock(&gl->mystruct,MyNum);

gl->sum++;

MCSUnlock(&gl->mystruct,MyNum);


}



}




/////////////////////////////////////////////////////////// MAIN


main(int argc, char **argv) {
int i;
int proc_id=0;
if (argc!=3) {
printf("./barrier #PROCS #ITERATIONS\n");
exit(-1);
}

numProcs=atoi(argv[1]);
iterations=atoi(argv[2]);

printf("Synthetic benchmark for lock/unlock primitives start...\n");
printf("Numprocs=%d;\nIterations=%d.\n",numProcs,iterations);

/* Initialization */
gl = (struct GlobalMemory *) G_MALLOC(sizeof(struct GlobalMemory));
LOCKINIT(gl->lockid);
BARINIT(final_sync);
gl->sum=0;
MCSInit(&gl->mystruct,numProcs);


for (i=1; i<numProcs; i++) {
PTHREAD_CREATE(work);
}
work();

WAIT_FOR_END(numProcs);

printf("Result=%d\n",gl->sum);

MAIN_END;

}


Thank you in advance!!
Best regards,