kalenji
March 6th, 2010, 06:27 AM
Hi everyone,
I'm trying to implement MCS lock in C by means of Posix Thread and efficient assembly code for fetch_and_store, and compare_and_swap operations on a SPARCv8plus machine. To do that, I have followed an implementation provided in a post of this Forum. On the contrary, I implement fetch_and_store and compare_and_swap operations through inline assembly instructions of SPARCv8plus ISA.
However, I have problems with the implementation, because It doesn't seem to perform CAS instructions properly. I suspect that the problem may be in not to include membar instructions to assure memory consistency, or not to use volatile declarations for variables.
My code is as follows:
#include <stdio.h>
#include <stdlib.h>
struct _MCSstruct
{
int n;
volatile struct MCSnode *MCSnodes;
volatile struct MCSnode *tail;
};
typedef struct _MCSstruct MCSstruct;
#define TRUE 1
#define FALSE 0
struct MCSnode
{
int pad1[32];
int locked;
struct MCSnode * next;
int pad2[32];
};
unsigned numProcs;
struct GlobalMemory {
LOCKDEC(lockid)
int sum;
MCSstruct mystruct;
};
struct GlobalMemory *gl;
unsigned iterations;
unsigned numProcs;
int phase = 0;
TreeBar final_sync;
/////////////// AUXILIAR FUNCTIONS
static inline struct MCSnode * fetch_and_store(volatile struct MCSnode **
tail,struct MCSnode ** i) {
/*
LOCK(gl->lockid);
struct MCSnode * temp = *tail;
*tail = *i;
UNLOCK(gl->lockid);
return temp;
*/
unsigned __temp=9, __i,__hi,__li,__hli,__hhi,__lli,__lhi;
// Big endian to Little endian
__li=0xFFFF0000&((unsigned)*i);
__hi=0x0000FFFF&((unsigned)*i);
__li=__li>>16;
__hi=__hi<<16;
__i = __li | __hi;
__hhi=0xFF000000&__hi;
__hli=0x00FF0000&__hi;
__lhi=0x0000FF00&__li;
__lli=0x000000FF&__li;
__hli=__hli<<8;
__hhi=__hhi>>8;
__lli=__lli<<8;
__lhi=__lhi>>8;
__i=__hhi | __hli | __lhi | __lli;
__asm__ __volatile__ (
"1:\n\t"
"ld [%3], %0\n\t"
"cas [%3], %0, %1\n\t"
"cmp %0, %1\n\t"
"bne,a,pn %%icc, 1b\n\t"
: "+r" (__temp), "+r" (__i), "+m" (*tail), "+r" (tail)
: :"cc");
return __temp;
}
static inline int compare_and_swap(volatile struct MCSnode ** tail, struct MCSnode
** i) {
/*
int status;
LOCK(gl->lockid);
if((*tail)!= (*i) ) { UNLOCK(gl->lockid); return 0;}
*tail = NULL;
UNLOCK(gl->lockid);
return 1;
*/
unsigned __tmp2, __temp, __res;
__asm__ __volatile__ (
"ld [%3], %0\n\t"
"1:\n\t"
"mov 0, %1\n\t"
"cas [%3], %0, %1\n\t"
"cmp %0, %1\n\t"
"bne,a,pn %%icc, 1b\n\t"
"nop"
: "+r" (__temp), "+r" (__tmp2), "+m" (*tail), "+r" (tail)
: : "cc");
return 1;
}
////////////////////// MCS IMPLEMENTATION
void MCSInit(MCSstruct * MCSstr,int nodes)
{
int i;
MCSstr->n=nodes;
MCSstr->tail=NULL;
MCSstr->MCSnodes=(struct MCSnode *)malloc(nodes*sizeof(struct MCSnode));
}
void MCSLock(MCSstruct * MCSstr,int whoami)
{
struct MCSnode * i = &MCSstr->MCSnodes[whoami];
i->next = NULL;
START_ACQ
struct MCSnode * predecessor = fetch_and_store( &MCSstr->tail, &i );
if(predecessor) {
ACQ_MEMBAR
i->locked = 1;
predecessor->next = i;
while(i->locked == 1) ;
}
END_AGG;
}
void MCSUnlock(MCSstruct * MCSstr,int whoami)
{
START_REL
REL_MEMBAR
struct MCSnode * i = &MCSstr->MCSnodes[whoami];
if(!i->next) {
ACQ_MEMBAR
if(compare_and_swap( &MCSstr->tail, &i )) return;
while(!i->next);
}
i->next->locked = FALSE;
END_AGG;
}
/////////////////////////////////////////////////////////// THREAD work
void work() {
int i,j;
unsigned MyNum = getpid();
BARRIER(0,numProcs);
for (i=0;i<iterations;i++) {
MCSLock(&gl->mystruct,MyNum);
gl->sum++;
MCSUnlock(&gl->mystruct,MyNum);
}
}
/////////////////////////////////////////////////////////// MAIN
main(int argc, char **argv) {
int i;
int proc_id=0;
if (argc!=3) {
printf("./barrier #PROCS #ITERATIONS\n");
exit(-1);
}
numProcs=atoi(argv[1]);
iterations=atoi(argv[2]);
printf("Synthetic benchmark for lock/unlock primitives start...\n");
printf("Numprocs=%d;\nIterations=%d.\n",numProcs,iterations);
/* Initialization */
gl = (struct GlobalMemory *) G_MALLOC(sizeof(struct GlobalMemory));
LOCKINIT(gl->lockid);
BARINIT(final_sync);
gl->sum=0;
MCSInit(&gl->mystruct,numProcs);
for (i=1; i<numProcs; i++) {
PTHREAD_CREATE(work);
}
work();
WAIT_FOR_END(numProcs);
printf("Result=%d\n",gl->sum);
MAIN_END;
}
Thank you in advance!!
Best regards,
I'm trying to implement MCS lock in C by means of Posix Thread and efficient assembly code for fetch_and_store, and compare_and_swap operations on a SPARCv8plus machine. To do that, I have followed an implementation provided in a post of this Forum. On the contrary, I implement fetch_and_store and compare_and_swap operations through inline assembly instructions of SPARCv8plus ISA.
However, I have problems with the implementation, because It doesn't seem to perform CAS instructions properly. I suspect that the problem may be in not to include membar instructions to assure memory consistency, or not to use volatile declarations for variables.
My code is as follows:
#include <stdio.h>
#include <stdlib.h>
struct _MCSstruct
{
int n;
volatile struct MCSnode *MCSnodes;
volatile struct MCSnode *tail;
};
typedef struct _MCSstruct MCSstruct;
#define TRUE 1
#define FALSE 0
struct MCSnode
{
int pad1[32];
int locked;
struct MCSnode * next;
int pad2[32];
};
unsigned numProcs;
struct GlobalMemory {
LOCKDEC(lockid)
int sum;
MCSstruct mystruct;
};
struct GlobalMemory *gl;
unsigned iterations;
unsigned numProcs;
int phase = 0;
TreeBar final_sync;
/////////////// AUXILIAR FUNCTIONS
static inline struct MCSnode * fetch_and_store(volatile struct MCSnode **
tail,struct MCSnode ** i) {
/*
LOCK(gl->lockid);
struct MCSnode * temp = *tail;
*tail = *i;
UNLOCK(gl->lockid);
return temp;
*/
unsigned __temp=9, __i,__hi,__li,__hli,__hhi,__lli,__lhi;
// Big endian to Little endian
__li=0xFFFF0000&((unsigned)*i);
__hi=0x0000FFFF&((unsigned)*i);
__li=__li>>16;
__hi=__hi<<16;
__i = __li | __hi;
__hhi=0xFF000000&__hi;
__hli=0x00FF0000&__hi;
__lhi=0x0000FF00&__li;
__lli=0x000000FF&__li;
__hli=__hli<<8;
__hhi=__hhi>>8;
__lli=__lli<<8;
__lhi=__lhi>>8;
__i=__hhi | __hli | __lhi | __lli;
__asm__ __volatile__ (
"1:\n\t"
"ld [%3], %0\n\t"
"cas [%3], %0, %1\n\t"
"cmp %0, %1\n\t"
"bne,a,pn %%icc, 1b\n\t"
: "+r" (__temp), "+r" (__i), "+m" (*tail), "+r" (tail)
: :"cc");
return __temp;
}
static inline int compare_and_swap(volatile struct MCSnode ** tail, struct MCSnode
** i) {
/*
int status;
LOCK(gl->lockid);
if((*tail)!= (*i) ) { UNLOCK(gl->lockid); return 0;}
*tail = NULL;
UNLOCK(gl->lockid);
return 1;
*/
unsigned __tmp2, __temp, __res;
__asm__ __volatile__ (
"ld [%3], %0\n\t"
"1:\n\t"
"mov 0, %1\n\t"
"cas [%3], %0, %1\n\t"
"cmp %0, %1\n\t"
"bne,a,pn %%icc, 1b\n\t"
"nop"
: "+r" (__temp), "+r" (__tmp2), "+m" (*tail), "+r" (tail)
: : "cc");
return 1;
}
////////////////////// MCS IMPLEMENTATION
void MCSInit(MCSstruct * MCSstr,int nodes)
{
int i;
MCSstr->n=nodes;
MCSstr->tail=NULL;
MCSstr->MCSnodes=(struct MCSnode *)malloc(nodes*sizeof(struct MCSnode));
}
void MCSLock(MCSstruct * MCSstr,int whoami)
{
struct MCSnode * i = &MCSstr->MCSnodes[whoami];
i->next = NULL;
START_ACQ
struct MCSnode * predecessor = fetch_and_store( &MCSstr->tail, &i );
if(predecessor) {
ACQ_MEMBAR
i->locked = 1;
predecessor->next = i;
while(i->locked == 1) ;
}
END_AGG;
}
void MCSUnlock(MCSstruct * MCSstr,int whoami)
{
START_REL
REL_MEMBAR
struct MCSnode * i = &MCSstr->MCSnodes[whoami];
if(!i->next) {
ACQ_MEMBAR
if(compare_and_swap( &MCSstr->tail, &i )) return;
while(!i->next);
}
i->next->locked = FALSE;
END_AGG;
}
/////////////////////////////////////////////////////////// THREAD work
void work() {
int i,j;
unsigned MyNum = getpid();
BARRIER(0,numProcs);
for (i=0;i<iterations;i++) {
MCSLock(&gl->mystruct,MyNum);
gl->sum++;
MCSUnlock(&gl->mystruct,MyNum);
}
}
/////////////////////////////////////////////////////////// MAIN
main(int argc, char **argv) {
int i;
int proc_id=0;
if (argc!=3) {
printf("./barrier #PROCS #ITERATIONS\n");
exit(-1);
}
numProcs=atoi(argv[1]);
iterations=atoi(argv[2]);
printf("Synthetic benchmark for lock/unlock primitives start...\n");
printf("Numprocs=%d;\nIterations=%d.\n",numProcs,iterations);
/* Initialization */
gl = (struct GlobalMemory *) G_MALLOC(sizeof(struct GlobalMemory));
LOCKINIT(gl->lockid);
BARINIT(final_sync);
gl->sum=0;
MCSInit(&gl->mystruct,numProcs);
for (i=1; i<numProcs; i++) {
PTHREAD_CREATE(work);
}
work();
WAIT_FOR_END(numProcs);
printf("Result=%d\n",gl->sum);
MAIN_END;
}
Thank you in advance!!
Best regards,