Qsort easily beats std::sort() 100+ times!
I keep hearing from folks saying that it is unsafe to call Qsort() or qsort() because it has no idea about the constructors or assignment operators of none-POD data types.
One has to realize that in sorting object, you don't need to create new ones, you don't need to destroy old ones. All you need to do is moving objects around in memory. Is there any operation safer to do, than merely moving an object from one memory location to another? Qsort does NOT need to know the constructors etc., because it doesn't need to create or destroy objects.
That's exactly where Qsort beats std::sort() by big time. By "big" I mean more than 100 times faster.
Just try it with sorting an array of 100000 objects of a very simple class where the data member is simply one pointer pointing to 4KB memory allocated upon object creation, and deleted upon object destruction, and memcpy()'ed when object is copied. Try to see how std::sort handles it and how qsort() does it, you will see a big difference.
Especially when memory usage is near full, std::sort() will be literally brought down to craw on the ground, desperately trying to create new objects, allocate memory, memcpy() memory, then destroy object, all in an effort that is un-necessary and wasted.
While as qsort() is flying, sorting objects by just swapping the 4 bytes pointer that represent the class object.
No wonder Qsort() can easily defeat std::sort() by 100+ times or even more in such case.
Actual Qsort beats std::sort by 1000 times faster!
Well I just finished the testing. Here is the result:
number of objects: 100000
BUFF_SIZE: 4096 (bytes)
std::sort using functor: 1989240 ticks
std::sort using function: 222950 ticks
Qsort: 2035 ticks
See the big difference?
I am running the test on a PII 500MHz machine, 256 MB memory. With 4 IE windows and VisualStudio running. No other applications running.
The corrected version of Qsort
Looks like I was copying the original Qsort from some where which contains a bug. But even that doesn't explain why it doesn't work on Paul's machine. Because I compiled the same thing and it works, for both debug and release.
Another test result:
std::sort using functor: 8938 ticks
std::sort using function: 8906 ticks
Qsort: 297 ticks
Here is the correct Qsort() function:
Code:
// This is Anthony's improved version of qsort -A.Mai 7/18/2002
void __cdecl Qsort (
void *base,
unsigned num,
unsigned width,
int (__cdecl *comp)(const void *, const void *)
)
{
char *lo, *hi;
char *mid;
char *loguy, *higuy;
unsigned size;
char *lostk[30], *histk[30];
int stkptr;
if (num < 2 || width == 0)
{
return;
}
stkptr = 0;
lo = (char*)base;
hi = (char *)base + width * (num-1);
recurse:
size = (hi - lo) / width + 1;
if (size > CUTOFF)
{
mid = lo + (size / 2) * width;
{
register int w2 = width;
if (w2 < 4)
{
do
{
char tmp;
w2 --;
tmp = mid[w2];
mid[w2] = lo[w2];
lo[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;
tmp = *(int*)(&mid[w2-4]);
*(int*)(&mid[w2-4]) = *(int*)(&lo[w2-4]);
*(int*)(&lo[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&mid[w2]);
*(int*)(&mid[w2]) = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = tmp;
}
}
}
loguy = lo;
higuy = hi + width;
for (;;)
{
do {
loguy += width;
} while (loguy <= hi && comp(loguy, lo) < 0); // <=
do {
higuy -= width;
} while (higuy > lo && comp(higuy, lo) > 0); // >=
if (higuy < loguy)
{
break;
}
{
register int w2 = width;
if (w2 < 4)
{
do
{
char tmp;
w2 --;
tmp = loguy[w2];
loguy[w2] = higuy[w2];
higuy[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;
tmp = *(int*)(&loguy[w2-4]);
*(int*)(&loguy[w2-4]) = *(int*)(&higuy[w2-4]);
*(int*)(&higuy[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&loguy[w2]);
*(int*)(&loguy[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}
}
{
register int w2 = width;
if (w2 < 4)
{
do
{
char tmp;
w2 --;
tmp = lo[w2];
lo[w2] = higuy[w2];
higuy[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;
tmp = *(int*)(&lo[w2-4]);
*(int*)(&lo[w2-4]) = *(int*)(&higuy[w2-4]);
*(int*)(&higuy[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
int tmp = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}
if ( higuy - 1 - lo >= hi - loguy )
{
if (lo + width < higuy)
{
lostk[stkptr] = lo;
histk[stkptr] = higuy - width;
++stkptr;
}
if (loguy < hi)
{
lo = loguy;
goto recurse;
}
}
else
{
if (loguy < hi)
{
lostk[stkptr] = loguy;
histk[stkptr] = hi;
++stkptr;
}
if (lo + width < higuy)
{
hi = higuy - width;
goto recurse;
}
}
}
else
{
char *p, *max;
while (hi > lo)
{
max = lo;
for (p = lo+width; p <= hi; p += width)
{
if (comp(p, max) > 0)
{
max = p;
}
}
if ( max != hi )
{
register int w2 = width;
if (w2 < 4)
{
do {
char tmp;
w2 --;
tmp = max[w2];
max[w2] = hi[w2];
hi[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;
tmp = *(int*)(&max[w2-4]);
*(int*)(&max[w2-4]) = *(int*)(&hi[w2-4]);
*(int*)(&hi[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&max[w2]);
*(int*)(&max[w2]) = *(int*)(&hi[w2]);
*(int*)(&hi[w2]) = tmp;
}
}
}
hi -= width;
}
}
--stkptr;
if (stkptr >= 0)
{
lo = lostk[stkptr];
hi = histk[stkptr];
goto recurse;
}
return;
}