|
-
July 25th, 2002, 10:01 AM
#1
Qsort easily beats std::sort() 100+ times!
I keep hearing from folks saying that it is unsafe to call Qsort() or qsort() because it has no idea about the constructors or assignment operators of none-POD data types.
One has to realize that in sorting object, you don't need to create new ones, you don't need to destroy old ones. All you need to do is moving objects around in memory. Is there any operation safer to do, than merely moving an object from one memory location to another? Qsort does NOT need to know the constructors etc., because it doesn't need to create or destroy objects.
That's exactly where Qsort beats std::sort() by big time. By "big" I mean more than 100 times faster.
Just try it with sorting an array of 100000 objects of a very simple class where the data member is simply one pointer pointing to 4KB memory allocated upon object creation, and deleted upon object destruction, and memcpy()'ed when object is copied. Try to see how std::sort handles it and how qsort() does it, you will see a big difference.
Especially when memory usage is near full, std::sort() will be literally brought down to craw on the ground, desperately trying to create new objects, allocate memory, memcpy() memory, then destroy object, all in an effort that is un-necessary and wasted.
While as qsort() is flying, sorting objects by just swapping the 4 bytes pointer that represent the class object.
No wonder Qsort() can easily defeat std::sort() by 100+ times or even more in such case.
-
July 25th, 2002, 10:05 AM
#2
Here is my test code
Code:
#include <vector>
#include <algorithm>
#include <functional>
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <string>
#define ELEMENT_COUNT 100000
#define BUFF_SIZE 4096
#define CUTOFF 8 /* testing shows that this is good value */
using namespace std;
class SomeClass
{
public:
SomeClass();
~SomeClass();
SomeClass(const SomeClass& src);
SomeClass& operator=(const SomeClass& src);
std::string fld1;
char* m_pBuff;
};
SomeClass::SomeClass()
{
m_pBuff = new char[BUFF_SIZE];
}
SomeClass::~SomeClass()
{
delete [] m_pBuff;
}
SomeClass::SomeClass(const SomeClass& src) :
fld1(src.fld1)
{
m_pBuff = new char[BUFF_SIZE];
memcpy(m_pBuff, src.m_pBuff, BUFF_SIZE);
}
SomeClass& SomeClass::operator=(const SomeClass& src)
{
fld1 = src.fld1;
memcpy(m_pBuff, src.m_pBuff, BUFF_SIZE);
return *this;
}
struct SortFld
{
inline bool operator() (const SomeClass& first, const SomeClass& second)
{
return strcmp(first.fld1.c_str(), second.fld1.c_str())<0?true:false;
}
};
typedef vector<SomeClass> intvec;
void randomise_vector(intvec& v)
{
srand(time(0));
char szBuf[15];
for (intvec::iterator i = v.begin(); i != v.end(); ++i)
{
sprintf(szBuf, "%d", 100 + (((unsigned int)rand())%999));
(*i).fld1 = szBuf;
strcpy((*i).m_pBuff, szBuf);
}
}
inline bool stllessfunc(const SomeClass& lhs, const SomeClass& rhs)
{
return strcmp(lhs.fld1.c_str(), rhs.fld1.c_str())<0?true:false;
}
inline int qscompare(const void* lhs, const void* rhs)
{
return strcmp(((SomeClass *)lhs)->fld1.c_str(), ((SomeClass *)rhs)->fld1.c_str());
}
void __cdecl Qsort (
void *base,
unsigned num,
unsigned width,
int (__cdecl *comp)(const void *, const void *)
);
int main()
{
srand(clock());
intvec v(ELEMENT_COUNT);
randomise_vector(v);
cout << "std::sort using functor: ";
clock_t start = clock();
sort(v.begin(), v.end(), SortFld());
clock_t stop = clock();
cout << stop - start << " ticks" << endl;
randomise_vector(v);
cout << "std::sort using function: ";
start = clock();
sort(v.begin(), v.end(), stllessfunc);
stop = clock();
cout << stop - start << " ticks" << endl;
randomise_vector(v);
cout << "Qsort: ";
start = clock();
//qsort(static_cast<void*>(&v[0]), ELEMENT_COUNT, sizeof(SomeClass), qscompare);
Qsort(static_cast<void*>(&v[0]), ELEMENT_COUNT, sizeof(SomeClass), qscompare);
stop = clock();
cout << stop - start << " ticks" << endl;
char c;
cin >> c;
return 0;
}
void __cdecl Qsort (
void *base,
unsigned num,
unsigned width,
int (__cdecl *comp)(const void *, const void *)
)
{
char *lo, *hi;
char *mid;
char *loguy, *higuy;
unsigned size;
char *lostk[30], *histk[30];
int stkptr;
/* Note: the number of stack entries required is no more than
1 + log2(size), so 30 is sufficient for any array */
if (num < 2 || width == 0)
{
return;
}
stkptr = 0;
lo = (char*)base;
hi = (char *)base + width * (num-1);
recurse:
size = (hi - lo) / width + 1;
if (size <= CUTOFF)
{
char *p, *max;
while (hi > lo)
{
max = lo;
for (p = lo+width; p <= hi; p += width)
{
if (comp(p, max) > 0)
{
max = p;
}
}
if ( max != hi )
{
register int w2 = width;
if (w2 < 4)
{
do {
char tmp = max[w2];
max[w2] = hi[w2];
hi[w2] = tmp;
} while ((w2--) & 3);
}
else
{
while (w2 >= 4)
{
w2 -= 4;
int tmp = *(int*)(&max[w2]);
*(int*)(&max[w2]) = *(int*)(&hi[w2]);
*(int*)(&hi[w2]) = tmp;
}
}
}
hi -= width;
}
}
else
{
mid = lo + (size / 2) * width;
if ( mid != lo )
{
register int w2 = width;
while ((w2--) & 3)
{
char tmp = mid[w2];
mid[w2] = lo[w2];
lo[w2] = tmp;
}
while (w2 >= 4)
{
w2 -= 4;
int tmp = *(int*)(&mid[w2]);
*(int*)(&mid[w2]) = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = tmp;
}
}
loguy = lo;
higuy = hi + width;
for (;;)
{
do {
loguy += width;
} while (loguy <= hi && comp(loguy, lo) < 0);
do {
higuy -= width;
} while (higuy > lo && comp(higuy, lo) > 0);
if (higuy < loguy)
{
break;
}
if ( loguy != higuy )
{
register int w2 = width;
while ((w2--) & 3)
{
char tmp = loguy[w2];
loguy[w2] = higuy[w2];
higuy[w2] = tmp;
}
while (w2 >= 4)
{
w2 -= 4;
int tmp = *(int*)(&loguy[w2]);
*(int*)(&loguy[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}
if ( lo != higuy )
{
register int w2 = width;
while ((w2--) & 3)
{
char tmp = lo[w2];
lo[w2] = higuy[w2];
higuy[w2] = tmp;
}
while (w2 >= 4)
{
w2 -= 4;
int tmp = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
if ( higuy - 1 - lo >= hi - loguy )
{
if (lo + width < higuy)
{
lostk[stkptr] = lo;
histk[stkptr] = higuy - width;
++stkptr;
}
if (loguy < hi)
{
lo = loguy;
goto recurse;
}
}
else
{
if (loguy < hi)
{
lostk[stkptr] = loguy;
histk[stkptr] = hi;
++stkptr;
}
if (lo + width < higuy)
{
hi = higuy - width;
goto recurse;
}
}
}
--stkptr;
if (stkptr >= 0)
{
lo = lostk[stkptr];
hi = histk[stkptr];
goto recurse;
}
return;
}
-
July 25th, 2002, 11:08 AM
#3
Actual Qsort beats std::sort by 1000 times faster!
Well I just finished the testing. Here is the result:
number of objects: 100000
BUFF_SIZE: 4096 (bytes)
std::sort using functor: 1989240 ticks
std::sort using function: 222950 ticks
Qsort: 2035 ticks
See the big difference?
I am running the test on a PII 500MHz machine, 256 MB memory. With 4 IE windows and VisualStudio running. No other applications running.
-
July 25th, 2002, 11:09 AM
#4
You might want to rethink your class design in this case. Of course qsort() is faster since your semantics for using std::sort and qsort() are different. For qsort you just copy the ptr not the data inside whereas your assignment operator for SomeClass copies the actual data (in addition to allocating space for it), that is very time consuming. I recommend that you use std::auto_ptr or boost::shared_ptr instead of copying the data.
Your qsort must really suck if it's just 100 times faster than std::sort in that case.
-
July 25th, 2002, 11:12 AM
#5
Sorting just 10000 elements:
std::sort using functor: 6870ticks
std::sort using function: 6640 ticks
Qsort: 50 ticks
See the difference?
-
July 25th, 2002, 11:36 AM
#6
I have read both this and the other thread (" The fear of the misterious "). I'm not quite sure I understand what is what you want to prove -- i.e. what exactly is your statement ?
-
July 25th, 2002, 11:42 AM
#7
Anyway I rewrote your code so the semantics would be a bit closer to each other and on my machine it's more than 4:1 rather than 100+:1
std::sort using functor: 531 ticks
std::sort using function: 530 ticks
Qsort: 120 ticks
I haven't even tried to optimise any code and thinking of it this may be a fair price to pay to have nice-behaving code rather than code that may or may not work, depending on compiler vendor/version etc..
-
July 25th, 2002, 12:47 PM
#8
Does anyone else get an access violation with this code? I think I copied it incorrectly [this webpage puts a lot of weird symbols inside embedded code if you use ; : ) and other characters too close together]. With the code I have, Qsort produces an access violation, whereas qsort does not.
-
July 25th, 2002, 01:04 PM
#9
Originally posted by amag
Anyway I rewrote your code so the semantics would be a bit closer to each other and on my machine it's more than 4:1 rather than 100+:1
std::sort using functor: 531 ticks
std::sort using function: 530 ticks
Qsort: 120 ticks
I haven't even tried to optimise any code and thinking of it this may be a fair price to pay to have nice-behaving code rather than code that may or may not work, depending on compiler vendor/version etc..
amag, can you post your version? I'd like to take a look at it.
Regards,
Paul McKenzie
-
July 25th, 2002, 01:10 PM
#10
1) I ran your code on pgCC, for the Qsort() case I got the folowing
error message : "Segmentation fault (core dumped)"
2) I changed your main as given below. I took out the std::sort()
section, and put {} around the Qsort() section ...
On Visual C++ version 5, I got the following error window :
The instruction at "0x77164d8a" referenced memory at "0x002147dd". The
memory could not be "read".
Maybe version 6 will not have this problem ? Also, if I set BUFF_SIZE
smaller, I do not get the error.
Code:
int main()
{
srand(clock());
clock_t start,stop;
{
intvec v(ELEMENT_COUNT);
randomise_vector(v);
cout << "Qsort: ";
start = clock();
//qsort(static_cast<void*>(&v[0]), ELEMENT_COUNT, sizeof(SomeClass), qscompare);
Qsort(static_cast<void*>(&v[0]), ELEMENT_COUNT, sizeof(SomeClass), qscompare);
stop = clock();
cout << stop - start << " ticks" << endl;
}
return 0;
}
-
July 25th, 2002, 01:20 PM
#11
Philip said:
>>>>>>>>
On Visual C++ version 5, I got the following error window :
The instruction at "0x77164d8a" referenced memory at "0x002147dd". The
memory could not be "read".
Maybe version 6 will not have this problem ? Also, if I set BUFF_SIZE
smaller, I do not get the error
<<<<<<<<
I have VC 6.0 SP5 and I get that window popping up as well. I noticed that he has cin >> c prompting for input at the end; that delays the program. I thought he was doing that for unix platforms, but then I realized that with the __cdecl he's probably not moving it to other platforms anyway. The problem only occurs during the delete statement; he's clearly screwing up memory somewhere in his Qsort(). qsort(), on the other hand, isn't screwing anything up [at least that I've seen so far] ... and it is producing some good results. Unfortunately, I'm not willing to depend on it; I'd rather use std::sort and another method of sorting as another poster suggested [like keeping a list who internally sorts its members as they're inserted].
In case anyone else cares, the Qsort() function core dumps on hp-ux using the gcc-3.1 compiler [whereas qsort() works fine].
--Paul
-
July 25th, 2002, 01:22 PM
#12
Also, I ran the following on a somewhat high-end computer.
(I don't know the exact specs, but it is about a year old
and was pretty much top of the line at that time and had lots
of memory (maybe a gigabyte)) :
1) std::sort() using indirect sorting : 233 ticks
2) std::sort() using direct sorting : 13625 ticks
2) Qsort using direct sorting : 265 ticks (but get the error
message from the previous post).
On a much slower computer, Qsort() did outperform even the
indirect std:sort by about 3 to 1, but again, I got the
error window.
-
July 25th, 2002, 01:31 PM
#13
On VC 6.0, the fault occurs in the destructor of SomeClass right before the termination of the program.
Regards,
Paul McKenzie
Last edited by Paul McKenzie; July 25th, 2002 at 01:34 PM.
-
July 25th, 2002, 04:52 PM
#14
Funny. Paul claimed my code doesn't work on VC++ 6.0. I tested it (both release and debug) on VC++ 6.0 and it works just fine. And now I copy and paste my own posted code and tested it on a different machine, and it still works fine.
So far I tested it on both VC++6.0 enterprise edition and standard edition. Both works just fine.
Maybe Paul has a defective copy of VC++ 6.0?
The rest of reader groups can test for themselves on VC++ 6.0.
My test on a PIII 1700 MHz machine with 512 MB memory, using VC++ 6.0 standard edition:
std::sort using functor: 8656 ticks
std::sort using function: 8719 ticks
Qsort: 282 ticks
Try to increase the element count until your machine is using nearly all the memory and see what happens?
std::sort() would be crawing and Qsort() will still fly. Because the later doesn't need to allocate any memory.
-
July 25th, 2002, 05:00 PM
#15
Actually, I got the same result with VC++ 6.0. I thought maybe something it was only happening in a debug build and that's why you didn't see it, but I checked, and it's happening in both.
Posting Permissions
- You may not post new threads
- You may not post replies
- You may not post attachments
- You may not edit your posts
-
Forum Rules
|
Click Here to Expand Forum to Full Width
|