Click to See Complete Forum and Search --> : A more Qsort() function


Anthony Mai
July 18th, 2002, 06:14 PM
Here is a demo with a more efficient Qsort() function that I re-write in plain C. As shown by this example code. It easily beats the most efficient std::sort() based code.

#include <vector>
#include <algorithm>
#include <functional>
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <string>

#define CUTOFF 8

using namespace std;

// This is Anthony's improved version of qsort
void __cdecl Qsort (
void *base,
unsigned num,
unsigned width,
int (__cdecl *comp)(const void *, const void *)
);

class SomeClass
{
public:
std::string fld1;
std::string fld2;
std::string fld3;
std::string fld4;
};

struct SortFld
{
bool operator() (const SomeClass& first, const SomeClass& second)
{
return strcmp(first.fld1.c_str(), second.fld1.c_str())<0?true:false;
}
};


typedef vector<SomeClass> intvec;

void randomise_vector(intvec& v)
{
srand(time(0));
char szBuf[15];
for (intvec::iterator i = v.begin(); i != v.end(); ++i)
{
sprintf(szBuf, "%d", rand());
(*i).fld1 = szBuf;
}
}

inline bool stllessfunc(const SomeClass& lhs, const SomeClass& rhs)
{
return strcmp(lhs.fld1.c_str(), rhs.fld1.c_str())<0?true:false;
}

inline int qscompare(const void* lhs, const void* rhs)
{
return strcmp(((SomeClass *)lhs)->fld1.c_str(), ((SomeClass*)rhs)->fld1.c_str());
}


int main()
{
int i, total = 999999;
intvec v(total+1);

randomise_vector(v);

cout << "std::sort using functor: ";
clock_t start = clock();
sort(v.begin(), v.end(), SortFld());
clock_t stop = clock();
cout << stop - start << " ticks" << endl;

randomise_vector(v);

cout << "std::sort using function: ";
start = clock();
sort(v.begin(), v.end(), stllessfunc);
stop = clock();
cout << stop - start << " ticks" << endl;

randomise_vector(v);
cout << "qsort: ";
start = clock();
Qsort(static_cast<void*>(&v[0]), 1000000, sizeof(SomeClass), qscompare);
stop = clock();
cout << stop - start << " ticks" << endl;

for (i=0; i<999998; i++)
{
if (strcmp(v[i].fld1.c_str(), v[i+1].fld1.c_str()) > 0)
{
break;
}
}
if (i < 999998)
{
cout << "There is an error in Qsort" << endl;
}
else
{
cout << "There is NO error in Qsort" << endl;
}

cout << "Test done" << endl;

return 0;
}


// This is Anthony's improved version of qsort -A.Mai 7/18/2002
void __cdecl Qsort (
void *base,
unsigned num,
unsigned width,
int (__cdecl *comp)(const void *, const void *)
)
{
char *lo, *hi;
char *mid;
char *loguy, *higuy;
unsigned size;
char *lostk[30], *histk[30];
int stkptr;

if (num < 2 || width == 0)
{
return;
}

stkptr = 0;

lo = (char*)base;
hi = (char *)base + width * (num-1);

recurse:

size = (hi - lo) / width + 1;

if (size > CUTOFF)
{
mid = lo + (size / 2) * width;

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;

w2 --;
tmp = mid[w2];
mid[w2] = lo[w2];
lo[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&mid[w2-4]);
*(int*)(&mid[w2-4]) = *(int*)(&lo[w2-4]);
*(int*)(&lo[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&mid[w2]);
*(int*)(&mid[w2]) = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = tmp;
}
}
}

loguy = lo;
higuy = hi + width;

for (;;)
{
do {
loguy += width;
} while (loguy <= hi && comp(loguy, lo) <= 0);

do {
higuy -= width;
} while (higuy > lo && comp(higuy, lo) >= 0);

if (higuy < loguy)
{
break;
}

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;

w2 --;
tmp = loguy[w2];
loguy[w2] = higuy[w2];
higuy[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&loguy[w2-4]);
*(int*)(&loguy[w2-4]) = *(int*)(&higuy[w2-4]);
*(int*)(&higuy[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&loguy[w2]);
*(int*)(&loguy[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}
}

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;
w2 --;
tmp = lo[w2];
lo[w2] = higuy[w2];
higuy[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&lo[w2-4]);
*(int*)(&lo[w2-4]) = *(int*)(&higuy[w2-4]);
*(int*)(&higuy[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
int tmp = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}

if ( higuy - 1 - lo >= hi - loguy )
{
if (lo + width < higuy)
{
lostk[stkptr] = lo;
histk[stkptr] = higuy - width;
++stkptr;
}

if (loguy < hi)
{
lo = loguy;
goto recurse;
}
}
else
{
if (loguy < hi)
{
lostk[stkptr] = loguy;
histk[stkptr] = hi;
++stkptr;
}

if (lo + width < higuy)
{
hi = higuy - width;
goto recurse;
}
}
}
else
{
char *p, *max;

while (hi > lo)
{
max = lo;
for (p = lo+width; p <= hi; p += width)
{
if (comp(p, max) > 0)
{
max = p;
}
}

if ( max != hi )
{
register int w2 = width;

if (w2 < 4)
{
do {
char tmp;

w2 --;
tmp = max[w2];
max[w2] = hi[w2];
hi[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&max[w2-4]);
*(int*)(&max[w2-4]) = *(int*)(&hi[w2-4]);
*(int*)(&hi[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&max[w2]);
*(int*)(&max[w2]) = *(int*)(&hi[w2]);
*(int*)(&hi[w2]) = tmp;
}
}
}

hi -= width;
}
}


--stkptr;
if (stkptr >= 0)
{
lo = lostk[stkptr];
hi = histk[stkptr];
goto recurse;
}

return;
}

Philip Nicoletti
July 19th, 2002, 09:43 AM
I have kept out of these discussions because I think
peformance issues are much more complicated than running
a simple test and saying "see A is better than B" or
"B is better than A".

But, since the issue doesn't seem to want to go away ...

I always thought that these type of tests, although
interesting, don't prove much. But I guess they can give
you ideas to consider. If performance is an issue, then
tests must be done on actual data on the actual
target system(s).

What sorting algorithm is better, a quicksort type or
a heapsort type ? Quicksort right ? Probably, but not
in all cases. Quick sort has an AVERAGE CASE complexity
of O(n*log(n)) but a WORST CASE complexity of O(n*n).
Heapsort is also O(n*log(n)), but generally slower.
Or maybe you have info on what your data looks like
so you can write your own algorithm that runs faster than
std::sort(), qsort(), and Qsort(). (fyi: sgi's implementation
of std:sort() uses introsort(). See "Introspective Sorting
amd Selection Algorithms", Software Practice and Experience,
27(8),: 983, 1997)

At any rate, here are my results for std::sort vs Qsort() ...

system : Compaq Pressario 1200 laptop, AMD @ 450 MHz , 192 MB RAM
compiler : VC++ version 5 (Release build)


results for original code as posted by Anthony :

std::sort using functor: 19340 ticks
std::sort using function: 20540 ticks
qsort: 16150 ticks
There is NO error in Qsort
Test done
Press any key to continue

Modify the randomise_vector() with initialization supplied
by Anthony to test Paul's index array sort :


string RandomNumberString()
{
int i;
char tmp[4];
string result;

const char char_tbl2[] = "0123456789";

for (i=0; i<3; i++)
{
tmp[i] = char_tbl2[((unsigned int)rand())%(sizeof(char_tbl2)-1)];
}
tmp[sizeof(tmp)-1] = 0x00;

result = tmp;
return result;
}


void randomise_vector(intvec& v)
{
srand(time(0));
//char szBuf[15];
for (intvec::iterator i = v.begin(); i != v.end(); ++i)
{
// sprintf(szBuf, "%d", rand());
(*i).fld1 = RandomNumberString(); //szBuf;
}
}


Results :

std::sort using functor: 20260 ticks
std::sort using function: 21320 ticks
qsort: 100840 ticks
There is NO error in Qsort
Test done
Press any key to continue

(data characteristics are important).

vector<int>

std::sort using functor: 720 ticks
std::sort using function: 1160 ticks
Qsort: 1770 ticks


vector<double>

std::sort using functor: 1430 ticks
std::sort using function: 2360 ticks
Qsort: 11970 ticks


inline int qscompare(const void* lhs, const void* rhs)
{
return *(static_cast<const double*>(lhs)) < *(static_cast<const double*>(rhs)) ? -1 :
(*(static_cast<const double*>(rhs)) < *(static_cast<const double*>(lhs)) ? 1 : 0);
}


vector<double>

std::sort using functor: 1420 ticks
std::sort using function: 2360 ticks
Qsort: 6100 ticks


inline int qscompare(const void* lhs, const void* rhs)
{
double *d1 = (double*)lhs;
double *d2 = (double*)rhs;

if (*d1 < *d2) return -1;
if (*d1 > *d2) return 1;
return 0;

}


using an indirect sort instead of direct sort

std::sort using indirect sort: 13020 ticks
There is NO error in std::sort (indirect)
qsort: 15760 ticks
There is NO error in Qsort (indirect)
Test done
Press any key to continue



At any rate, I do not see any reason to put down
the performance std::sort() when someone posts
a question concerning sorting. Maybe qsort()
will do better, maybe std::sort() will do better.
Maybe the difference is so small it doesn't matter.

What is going to be done once the data is sorted ?
Remove records ? Add records, Change records ?
Will the data have to be resorted after that ?
Maybe a different container would be better than
vector. Many issues to consider before a reasonable
answer can be given concerning performance.

Paul McKenzie
July 19th, 2002, 11:51 AM
Hello Phillip,

The structure being sorted is just as important. qsort() will only work on a data structure where the elements are stored in contiguous memory. std::sort will work for any data structure that provides random access iteration. This is why if you change the vector<T> to a deque<T> in the code, qsort() or QSort() fail with memory access violation errors.

Since deque<> is used in place of vector<> for many performance reasons, you would then need to maintain two "qsort" functions to sort a deque and a vector. If a C++ programmer came up with another data structure that had random access iterators, std::sort plugs right into it, while qsort() doesn't. A typical 'C' runtime qsort() is not written this way, therefore it is not suitable for sorting anything else but a vector of simple types. If you rewrote a qsort() with the idea that random access iteration is the key, and wrapped it in a function that accepted two random access iterators, then you are comparing apples to apples.

Just these facts alone justifies improving std::sort (if it needs to be improved), not creating another array-based qsort(). The "introsort" that some implementors use is an example of the improvements made to std::sort.

Regards,

Paul McKenzie

Anthony Mai
July 19th, 2002, 02:15 PM
Philips:
Thanks for helping me to discover a bug in my original Qsort code!!!!
By correctly that bug (which is to remove the two un-needed = from ">=" near where the comp function is called) Qsort now works several times faster. BTW that bug was also in the orignal qsort source code.
Now Qsort not only beats both the functor and function version of std::sort by a big marging. Now it beats them but at least 3 times faster.
The corrected Qsort() function looks like this:

// This is Anthony's improved version of qsort -A.Mai 7/18/2002
void __cdecl Qsort (
void *base,
unsigned num,
unsigned width,
int (__cdecl *comp)(const void *, const void *)
)
{
char *lo, *hi;
char *mid;
char *loguy, *higuy;
unsigned size;
char *lostk[30], *histk[30];
int stkptr;

if (num < 2 || width == 0)
{
return;
}

stkptr = 0;

lo = (char*)base;
hi = (char *)base + width * (num-1);

recurse:

size = (hi - lo) / width + 1;

if (size > CUTOFF)
{
mid = lo + (size / 2) * width;

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;

w2 --;
tmp = mid[w2];
mid[w2] = lo[w2];
lo[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&mid[w2-4]);
*(int*)(&mid[w2-4]) = *(int*)(&lo[w2-4]);
*(int*)(&lo[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&mid[w2]);
*(int*)(&mid[w2]) = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = tmp;
}
}
}

loguy = lo;
higuy = hi + width;

for (;;)
{
do {
loguy += width;
} while (loguy <= hi && comp(loguy, lo) < 0); // <=

do {
higuy -= width;
} while (higuy > lo && comp(higuy, lo) > 0); // >=

if (higuy < loguy)
{
break;
}

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;

w2 --;
tmp = loguy[w2];
loguy[w2] = higuy[w2];
higuy[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&loguy[w2-4]);
*(int*)(&loguy[w2-4]) = *(int*)(&higuy[w2-4]);
*(int*)(&higuy[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&loguy[w2]);
*(int*)(&loguy[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}
}

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;
w2 --;
tmp = lo[w2];
lo[w2] = higuy[w2];
higuy[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&lo[w2-4]);
*(int*)(&lo[w2-4]) = *(int*)(&higuy[w2-4]);
*(int*)(&higuy[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
int tmp = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}

if ( higuy - 1 - lo >= hi - loguy )
{
if (lo + width < higuy)
{
lostk[stkptr] = lo;
histk[stkptr] = higuy - width;
++stkptr;
}

if (loguy < hi)
{
lo = loguy;
goto recurse;
}
}
else
{
if (loguy < hi)
{
lostk[stkptr] = loguy;
histk[stkptr] = hi;
++stkptr;
}

if (lo + width < higuy)
{
hi = higuy - width;
goto recurse;
}
}
}
else
{
char *p, *max;

while (hi > lo)
{
max = lo;
for (p = lo+width; p <= hi; p += width)
{
if (comp(p, max) > 0)
{
max = p;
}
}

if ( max != hi )
{
register int w2 = width;

if (w2 < 4)
{
do {
char tmp;

w2 --;
tmp = max[w2];
max[w2] = hi[w2];
hi[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&max[w2-4]);
*(int*)(&max[w2-4]) = *(int*)(&hi[w2-4]);
*(int*)(&hi[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&max[w2]);
*(int*)(&max[w2]) = *(int*)(&hi[w2]);
*(int*)(&hi[w2]) = tmp;
}
}
}

hi -= width;
}
}


--stkptr;
if (stkptr >= 0)
{
lo = lostk[stkptr];
hi = histk[stkptr];
goto recurse;
}

return;
}

Paul McKenzie
July 21st, 2002, 12:46 PM
Originally posted by Paul McKenzie
Hello Phillip,

The structure being sorted is just as important. qsort() will only work on a data structure where the elements are stored in contiguous memory. std::sort will work for any data structure that provides random access iteration. This is why if you change the vector<T> to a deque<T> in the code, qsort() or QSort() fail with memory access violation errors.

...

Regards,

Paul McKenzie For example, change to deque<>, here are the results:

std::sort using functor: 7000 ticks
std::sort using function: 8392 ticks
qsort: "The instruction at 0x004018e6" referenced memory at 0x0000001f". The memory could not be "read".

So OP, scramble and get your code to work for deque<> Then retest the QSort() code to see the results after your changes, both with vector and deque.

And when you get it working for deque<>, I'll try your new code on another container type (maybe one of the boost containers) and see the results. Oh, wait...you'll have to recode again, and you will need to know how the container works to write a sort for it. At this rate, it would never end.

What's my point? So far, I haven't changed a single line of code (except change the container) -- std::sort works flawlessly, QSort barfed. If I tested another container that take random access iterators, the OP will have to get on the ball and recode again, while std::sort stands up like a champ without changing anything.

If the OP or anyone else feels that this is good software design, to keep recoding, testing, recoding, testing, recoding, testing, just because the container type used by the class or application has changed, then go ahead and knock yourself out. You might as well not use virtual functions either.

From Phillip:

Maybe a different container would be better than
vector. Many issues to consider before a reasonable
answer can be given concerning performance.

I couldn't have said it any better.

Regards,

Paul McKenzie

Anthony Mai
July 22nd, 2002, 10:35 AM
Qsort is designed for sorting of objects arranged in continuous and sequential memory locations (i.e., object array or vector). No one ever said Qsort can be used on deque.

The same argument of using deque against Qsort is as valid as the argument using linked list against std:sort(). I don't believe std::sort() can be used to sort a linked list directly, can it? So must the original creator of std::sort() be blamed for writting something that can only sort something (vectors, arrays), but not some other thing (linked list)?

What's wrong with Qsort that it can not sorting something (deque) that is so rarely ever been used? Most of programmers have never used deque even once during their programming lifetime. A lot of them never even heard about deque. And there is absolutely nothing one can think about that must be done with deque.

And it remains that my improved Qsort() easily beats std::sort() by several times faster, with all sorts of objects I have tested so far. The original qsort() was written in a very poor way performance wise. I looked at the sorce code and immediately saw the problem, and I could came out with a few quick improvement which yields Qsort() you saw. Giving it some more thoughts I can easily make Qsort at least twice faster than what it is now.

When it comes to priority in programming. I do believe that most of the time the need to squeeze out a few more percentage of performance for a system that has millions of installments far more outweight the need of one lazy programmer to save his laziness by avoiding the need to re-write a piece of code, on the cost of several times lower performance.

Have you worked on a product that ended having 20+ million install bases? If you have, you've got to understand that the cost of your having to spend one hour exptra time to re-write a piece of critical code for better performance is absolutely no cost comparing with the reward that it now runs twice as fast on all 20 million machines in the whole world.

Philip Nicoletti
July 22nd, 2002, 12:01 PM
First : Very nice, I'm impressed.

However, I am still not seeing this Qsort() as being better
than std::sort. Here are the results that I got.

System 1 : NT , PIII @500 MHz (dual) , 384 MB RAM , VC++ version 5
System 2 : 98 , PIII @500 MHz , 512 MB RAM , VC++ version 6
System 3 : Irix , R10000 @ 250 MHz (dual) , 384 MB RAM, g++ (ver ?)

In the tables below, the first number is for the first
system, the second number for the second system, etc.

(the forum might mess up the spacing)

===========================================================
vector<int> :

std::sort using 2 function call : 484 , 860 , 1180
std::sort using functor : 590 , 470 , 1210
Qsort : 1062 , 1100 , 2320

===========================================================
vector<double> :

std::sort using 2 function call : 781 , 1040 , 1520
std::sort using functor : 1062 , 1040 , 1660
Qsort : 2031 , 2480 , 3150

===========================================================
vector<structure> , direct sort :

std::sort using 2 function call : 13960 , 14200 , 47080
std::sort using functor : 14248 , 14310 , 46750
Qsort : 10150 , 10120 , 21010
===========================================================
vector<structure> , indirect sort :

std::sort using functor : 5950 , 5910 , 25230
Qsort : 8422 , 8650 , 25420
===========================================================

So the fastest way to sort a structure ? As you suggested,
an indirect sort.

Paul McKenzie
July 22nd, 2002, 12:01 PM
Originally posted by Anthony Mai
Qsort is designed for sorting of objects arranged in continuous and sequential memory locations (i.e., object array or vector). No one ever said Qsort can be used on deque.
Then stop comparing apples to oranges. Your sort is a specialized version of the 'C' qsort() function. You are comparing a generalized algorithm function to one that is not generalized.
Here is a hypothetical question posted on CodeGuru:
-----------------------------------------------
Newbie: I have a deque<> that contains 1,000,000 objects, and I would like to sort on the age. How do I do that?
------------------------------------------------

Well, what is your answer? Use qsort() or QSort()? The same argument of using deque against Qsort is as valid as the argument using linked list against std:sort()[/B]. I don't believe std::sort() can be used to sort a linked list directly, can it? So must the original creator of std::sort() be blamed for writting something that can only sort something (vectors, arrays), but not some other thing (linked list)?That is because linked lists do not have random-access iterators, therefore a specialized version of sort is used for lists. And there is a reason why list doesn't use random access iterators. Do you know why?
What's wrong with Qsort that it can not sorting something (deque) that is so rarely ever been used? Most of programmers have never used deque even once during their programming lifetime. A lot of them never even heard about deque. And there is absolutely nothing one can think about that must be done with deque.All the C++ gurus on this board, take note of the above. So the only valid answers should be in terms of how many programmers Anthony Mai thinks uses a certain part of the library. I'm using deque<> right now. That goes to show you don't know a lot of programmers, and you don't know about the structure of a deque, but I leave that as a homework assignment for yourself (or maybe another CodeGuru can elighten you on what a deque is). You rail about "performance", but you can't think of any reason why someone would use deque over vector? This reveals a lot about your knowledge of C++.
And it remains that my improved Qsort() easily beats std::sort() by several times faster, with all sorts of objects I have tested so far. The original qsort() was written in a very poor way performance wise. I looked at the sorce code and immediately saw the problem, and I could came out with a few quick improvement which yields Qsort() you saw. Giving it some more thoughts I can easily make Qsort at least twice faster than what it is now.
Then give your code to PJ Plauger and have him take a look at it. He wrote the 'C' library also so maybe he can incorporate your "wonderful" code in his library (at least ping Plauger and let him know of his "bug"). Also, your code has only been tested for a certain compiler, for a certain OS, and you have not posted any numbers. Also, and more important, I guess you didn't see the link to the C++ User's Journal article I gave on the original thread (before you started this one) concerning qsort() and the dangers of using it on non-trivial classes. The guy who wrote the article is the chair of the ANSI C++ Standards committee. I know, I know -- he's wrong too.

But all of this is irrelevant. I have an application that uses deques -- your code is worthless to me and other programmers who use another data structure. I snipped the last irrelevant comments about working on applications to ask a simple question that I know that others would like to hear an answer:

Given the sentence above concerning usage of a deque do you have any C++ books, periodicals or reading material in your vicinity? If so, can you give me the titles?

Regards,

Paul McKenzie

Zeeshan
July 23rd, 2002, 08:28 AM
First Thanks Paul for introducing me good book of STL

The C++ Standard Library by Nicolai M. Josuttis


Most of programmers have never used deque even once during their programming lifetime. A lot of them never even heard about deque. And there is absolutely nothing one can think about that must be done with deque.


Hmm, sound interesting, but what about taking a look at C++ Standard. Sec 23.2 Stack use deque. Also take a look at Sec 23.2.3.3 of C++ Standard where the stack class is define.

What about Stack now? Isn't it widely used?

And from 23.2.3.1 queue also use deque bydefault.

I like the Paul question


Given the sentence above concerning usage of a deque do you have any C++ books, periodicals or reading material in your vicinity? If so, can you give me the titles?


I also like to know about those books which you are reading related to C++ and STL. At the moment i have three books of STL on my table and let me tell you something about deque from those books.

1. The C++ Standard Library by Nicolai M. Josuttis
2. Generic Programming and the STL by Matthew H Austern
3. Effective STL by Scott Meyers

What about take a look at item 1 From Effective STL


Item 1. Choose your containers with care.


The summary from these books are you should use deque when you want to insert item at both end, because you cant do it effeciently with vector. And when you dont want to waste extra memory. (In vector there may be some extra memory, take a look at capacity() and reserve() function of vector). Sec 6.3.1 from The C++ Standard Library.

And usually deque is implemented in the form of segmented array
to improve its performance. And use when want to insert item at both end. Sec 16.1.4 from Generic Programming and the STL.

Now come to sort. Take a look at standard again this time sec 25.3 "Sorting and related operations"

Correct me if i am wrong. As far as i have studied there isnt any algorithm define there for sort elements. So it is depend on implementation of STL. Means you are going to compare qsort with unknown sorting algorithm (b/c it is implementation specific). If you compare qsort with some sorting algorithm then it seems fine but here you compare qsort with unknown sorting algorithm.

And if you are very much concern about performance then i think take a look at


Art of computer programming volume 3
by Doneld E Knuth


Then you ll get the better idea, which algo is better then other and in which condition.


I don't believe std::sort() can be used to sort a linked list directly, can it


Hmm, interesting point, but again i have to go to the C++ Standard.

Now from Sec 25.3.1.1 "sort" take a look there sort need random access iterator.

Now from Sec 23.2.2.1 list support bidirectional iterator.

And what are bidirectional iterator and random access iterator take a look at Sec 24.1.4 and 24.1.5 from C++ Standard. Got ans why sort not apply on list?


Most of programmers have never used deque even once during their programming lifetime. A lot of them never even heard about deque.


No comments if you not use it before.

And i think this form is for C++, so here we are taking about (most of the time) standard C++. That's the reason i give reference of Standard of C++, to see what C++ Standard said about this.

And few words about qsort.

Correct me if i am wrong. As far as i know quick sort have O(n^2) in its worst case.

AnthonyMai
July 23rd, 2002, 11:05 AM
I do have a few C++ books on my bookshelf, including Plaugher's "draft" C++ standard, which is one of the very few book that I regretted buying. I have plenty of other books that I value much more. They deal with solving practical problems like audio video compression and processing technology, user interface design, cryptography, scientific emulation, neuronetwork, device driver programming etc.

Philip Nicoletti
July 23rd, 2002, 11:37 AM
I continued my reading of Stroustrup's book yesterday, and
came to the section on "specialization". The example he gave
was directly relevant to sorting : using specialization to swap
objects more efficiently.

So, for the direct structure, I specialized std::iter_swap().
Here are the results for the same 3 systems as my previous post:

vector<structure> , direct sort, specialized std::iter_swap()

std::sort : 8350 , 8320 , 23250
Qsort() : 10130 , 10010 , 23150

(here is the code if you are interested. "qsort.h" is Anthony's
code given previously (with #define CUTOFF 8)


#include <iostream>
#include <vector>
#include <ctime> // might need to use <time.h> instead
#include <functional>
#include <string>
#include <algorithm>


#include "qsort.h"

using namespace std;

string RandomNumberString()
{
char tmp[4];
int i;
string result;

const char char_tbl2[] = "0123456789";

for (i=0; i<3; i++)
{
tmp[i] = char_tbl2[((unsigned int)rand())%(sizeof(char_tbl2)-1)];
}
tmp[sizeof(tmp)-1] = 0x00;

result = tmp;
return result;
}

struct S
{
S(){}
~S(){}

inline bool operator < (const S& rhs) const
{
return strcmp(s1.c_str(), rhs.s1.c_str())<0?true:false;
}

inline void swap(S & rhs)
{
s1.swap(rhs.s1); // use std::string swap() function
s2.swap(rhs.s2);
s3.swap(rhs.s3);
s4.swap(rhs.s4);
}

string s1;
string s2;
string s3;
string s4;
};

struct SortFld
{
inline bool operator() (const S& first, const S& second)
{
return strcmp(first.s1.c_str(), second.s1.c_str())<0?true:false;
}
};


typedef vector<S> V;
typedef vector<S>::iterator IT;


////////////////////////////////////////////////////////////////////////
// //
// specialize std::iter_swap() for the vector<S>::iterator //

namespace std
{
template <> inline
void iter_swap<IT,IT>(IT a, IT b)
{
(*a).swap( (*b) ); // call member function to swap values
}
}
//
////////////////////////////////////////////////////////////////////////



void randomise_vector(V& v)
{
srand(11111);
for (IT i = v.begin(); i != v.end(); ++i)
{
(*i).s1 = RandomNumberString();
}
}


inline int qscompare(const void* lhs, const void* rhs)
{
return strcmp(((S*)lhs)->s1.c_str(), ((S*)rhs)->s1.c_str());
}


int main()
{
clock_t start,stop;

V v(1000000);

randomise_vector(v);
cout << "std::sort using 2 argument call: ";
start = clock();
sort(v.begin(), v.end());
stop = clock();
cout << stop - start << " ticks" << endl;

randomise_vector(v);
cout << "Qsort: ";
start = clock();
Qsort(static_cast<void*>(&v[0]), v.size(), sizeof(v[0]), qscompare);
stop = clock();
cout << stop - start << " ticks" << endl;


randomise_vector(v);
cout << "std::sort using functor call: ";
start = clock();
sort(v.begin(), v.end(),SortFld());
stop = clock();
cout << stop - start << " ticks" << endl;

return 0;
}

Paul McKenzie
July 23rd, 2002, 03:36 PM
Nice work Phillip. The iter_swap() is just one of those niceties of using std::sort. As of the use of string::swap, here is the section in Josuttis's book "The C++ Standard Library" on string::swap:

void string::swap (string& str)

void swap (string& str1, string& str2)

Both forms swap the value of the two strings.
- The member function swaps the contents of *this and str.
- The global function swaps the contents of str1 and str2.

You should prefer these functions over assignment if possible because they are faster. In fact, they are guaranteed to have constant complexity. See Section 11.2.8 page 490 for details.

So far it's diminishing returns for q(Q)sort. More code to maintain, possible bugs (QSort version), doesn't work on non-trivial classes, works only on vector or contiguous memory, not type-safe, and, at least running on VC++, not faster, or if faster, by a minimal amount.

Also (a minor point, but a point anyway), the QSort code is not valid ANSI C++ because of the "__cdecl". It didn't compile under Solaris. (remember, this is the non-VC++ forum -- oh I forgot -- it doesn't matter if it doesn't compile using a Small Potatoes (© 2002) compiler, so __cdecl is valid).

Here is the CUJ article that talks about std::sort and qsort(). It's good reading:

http://www.cuj.com/experts/1908/austern.htm?topic=experts

Here is a direct quote from the article (written by the chair of the ANSI C++ Standardization committee) with respect to the library qsort() function:

You can't use qsort to sort objects with constructors or virtual member functions, or to sort any data structure other than a C array. Although qsort isn't formally deprecated, its only real use is backward compatibility with C.

Regards,

Paul McKenzie

AnthonyMai
July 23rd, 2002, 04:34 PM
You can't use qsort to sort objects with constructors or virtual member functions, or to sort any data structure other than a C array. Although qsort isn't formally deprecated, its only real use is backward compatibility with C

With due respects, the above statement is wrong. I am going to show an example where qsort() or Qsort() is used to sort classes with a constructor that allocates memory. It works just fine without a problem or memory leak. I am also going to show a case where the class has virtual member functions, and it will also work just fine with (Q/q)sort().

Theoretically the quotes statement has some merit in expressing the worry that qsort() would not know how to call proper constructor/destructor to allocate/deallocate objects.

Practically, such worry is un-warranted, during sort, there is no need to create or destroy any object, you are merely moving objects from one memory location to another memory location. So, as I stated in one of my previous messages, unless the object contains data that correlates to the exactly memory location where the object is stored, qsort() works just fine sorting vectors or arrays of ANY complicated class objects

And that says practically about almost all of the class objects we use on daily basis, because it is rare that a class object's state depends on its own location.

AnthonyMai
July 23rd, 2002, 04:34 PM
#include <vector>
#include <algorithm>
#include <functional>
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <string>

#define ELEMENTS 1000000

#define CUTOFF 8

using namespace std;

// This is Anthony's improved version of qsort
void Qsort (
void *base,
unsigned num,
unsigned width,
int (*comp)(const void *, const void *)
);

inline int qscompare(const void* lhs, const void* rhs);


class SomeClass
{
public:
SomeClass();
SomeClass(const SomeClass& src);
~SomeClass();

SomeClass& operator= (const SomeClass& src);

int m_len;
std::string fld1;
char* m_pBuffer;

std::string fld2;
std::string fld3;
std::string fld4;
};

SomeClass::SomeClass()
{
m_len = 16 + (((unsigned int)rand()) % 64);
m_pBuffer = new char[m_len];
}

SomeClass::~SomeClass()
{
if (m_pBuffer)
{
delete [] m_pBuffer;
m_pBuffer = NULL;
}
m_len = 0;
}

SomeClass::SomeClass
(
const SomeClass& src
) :
m_pBuffer(NULL),
fld1(src.fld1),
fld2(src.fld2),
fld3(src.fld3),
fld4(src.fld4)
{
m_len = src.m_len;
if (src.m_pBuffer)
{
m_pBuffer = new char[m_len];
memcpy(m_pBuffer, src.m_pBuffer, m_len);
}
}


SomeClass& SomeClass::operator= (const SomeClass& src)
{
fld1 = src.fld1;
fld2 = src.fld2;
fld3 = src.fld3;
fld4 = src.fld4;

if (m_len != src.m_len)
{
if (m_len > 0)
{
delete [] m_pBuffer;
m_pBuffer = NULL;
}
m_len = src.m_len;
if (m_len > 0)
{
m_pBuffer = new char[m_len];
}
}

if (m_len && src.m_pBuffer)
{
memcpy(m_pBuffer, src.m_pBuffer, m_len);
}

return *this;
}


typedef vector<SomeClass> intvec;


string RandomNumberString()
{
int i;
char tmp[4];
string result;

const char char_tbl2[] = "0123456789";

for (i=0; i<3; i++)
{
tmp[i] = char_tbl2[((unsigned int)rand())%(sizeof(char_tbl2)-1)];
}
tmp[sizeof(tmp)-1] = 0x00;

result = tmp;
return result;
}


void randomise_vector(intvec& v)
{
srand(time(0));
char szBuf[15];
for (intvec::iterator i = v.begin(); i != v.end(); ++i)
{
sprintf(szBuf, "%d", (unsigned int)rand());
(*i).fld1 = szBuf;
//(*i).fld1 = RandomNumberString(); //szBuf;
}
}


int main()
{
int i, total = ELEMENTS-1;
clock_t start;
clock_t stop;
intvec v(total+1);

randomise_vector(v);
cout << "qsort: ";
start = clock();
Qsort(static_cast<void*>(&v[0]), ELEMENTS, sizeof(SomeClass), qscompare);
stop = clock();
cout << stop - start << " ticks" << endl;

for (i=0; i<ELEMENTS-2; i++)
{
if (strcmp(v[i].fld1.c_str(), v[i+1].fld1.c_str()) > 0)
{
break;
}
}
if (i < ELEMENTS-2)
{
cout << "There is an error in Qsort" << endl;
}
else
{
cout << "There is NO error in Qsort" << endl;
}

cout << "Test done" << endl;

return 0;
}


inline int qscompare(const void* lhs, const void* rhs)
{
return strcmp(((SomeClass *)lhs)->fld1.c_str(), ((SomeClass*)rhs)->fld1.c_str());
}

// This is Anthony's improved version of qsort -A.Mai 7/18/2002
void Qsort (
void *base,
unsigned num,
unsigned width,
int (*comp)(const void *, const void *)
)
{
char *lo, *hi;
char *mid;
char *loguy, *higuy;
unsigned size;
char *lostk[30], *histk[30];
int stkptr;

if (num < 2 || width == 0)
{
return;
}

stkptr = 0;

lo = (char*)base;
hi = (char *)base + width * (num-1);

recurse:

size = (hi - lo) / width + 1;

if (size > CUTOFF)
{
mid = lo + (size / 2) * width;

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;

w2 --;
tmp = mid[w2];
mid[w2] = lo[w2];
lo[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&mid[w2-4]);
*(int*)(&mid[w2-4]) = *(int*)(&lo[w2-4]);
*(int*)(&lo[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&mid[w2]);
*(int*)(&mid[w2]) = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = tmp;
}
}
}

loguy = lo;
higuy = hi + width;

for (;;)
{
do {
loguy += width;
} while (loguy <= hi && comp(loguy, lo) < 0); // <=

do {
higuy -= width;
} while (higuy > lo && comp(higuy, lo) > 0); // >=

if (higuy < loguy)
{
break;
}

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;

w2 --;
tmp = loguy[w2];
loguy[w2] = higuy[w2];
higuy[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&loguy[w2-4]);
*(int*)(&loguy[w2-4]) = *(int*)(&higuy[w2-4]);
*(int*)(&higuy[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&loguy[w2]);
*(int*)(&loguy[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}
}

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;
w2 --;
tmp = lo[w2];
lo[w2] = higuy[w2];
higuy[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&lo[w2-4]);
*(int*)(&lo[w2-4]) = *(int*)(&higuy[w2-4]);
*(int*)(&higuy[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
int tmp = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}

if ( higuy - 1 - lo >= hi - loguy )
{
if (lo + width < higuy)
{
lostk[stkptr] = lo;
histk[stkptr] = higuy - width;
++stkptr;
}

if (loguy < hi)
{
lo = loguy;
goto recurse;
}
}
else
{
if (loguy < hi)
{
lostk[stkptr] = loguy;
histk[stkptr] = hi;
++stkptr;
}

if (lo + width < higuy)
{
hi = higuy - width;
goto recurse;
}
}
}
else
{
char *p, *max;

while (hi > lo)
{
max = lo;
for (p = lo+width; p <= hi; p += width)
{
if (comp(p, max) > 0)
{
max = p;
}
}

if ( max != hi )
{
register int w2 = width;

if (w2 < 4)
{
do {
char tmp;

w2 --;
tmp = max[w2];
max[w2] = hi[w2];
hi[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&max[w2-4]);
*(int*)(&max[w2-4]) = *(int*)(&hi[w2-4]);
*(int*)(&hi[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&max[w2]);
*(int*)(&max[w2]) = *(int*)(&hi[w2]);
*(int*)(&hi[w2]) = tmp;
}
}
}

hi -= width;
}
}


--stkptr;
if (stkptr >= 0)
{
lo = lostk[stkptr];
hi = histk[stkptr];
goto recurse;
}

return;
}

Paul McKenzie
July 23rd, 2002, 05:19 PM
Anthony,

What's up with the new name? It shows you only posted 6 times! Is there something that you're not telling us here? Or is this another Anthony? Hmm.. Curious.

Anyway, the concern is that if you took your code to another compiler, the code could break down if you use non-trivial classes. This is the problem of only testing cases on a certain compiler and a certain set of criteria. Unless you are positively certain that v-tables, for example, have the same layout and are implemented the same across compilers (which they're not), then you have to go by what the standard says on these things (which I don't have in front of me at the moment).

Also (going back to the library version of qsort), it is very possible that the implementation of qsort() on a certain compiler uses memcpy() or memset(). Again, this is dangerous on non-trivial classes, and is probably what the author is referring to.

What you need to do is admit that the code may be fast (however according to Phillip's numbers, it doesn't beat std::sort), but it has only been tested for brand-x compiler, is limited to what types of sequences or containers it can sort, and may possibly break if used on another compiler using non-POD types.

In other words, Caveot Emptor.

Regards,

Paul McKenzie

AnthonyMai
July 23rd, 2002, 07:51 PM
Paul:
No actually I do not have a story to tell. Obviously some one simply do not want me to have a name with a space in between. So I removed the space, and now I am able to post again. Unfortunately that means I lose all my post counts.
Doesn't that make you feel more satisfied that now your post count is 1000 times more than mine? OK with me.
Rumor has it that Paul gets the board moderator to ban me from accessing this board again. That's not true. I don't get paid for posting here. So I don't get banned either. And this is a free country. So I don't get jailed for speaking our my opinion.
Happy computing, Paul.
P.S. If some one still doesn't like my name. I can change again. Heck it doesn't cost me a penny to give myself a new name.

Zeeshan
July 23rd, 2002, 11:02 PM
Paul i go through the Standard of C++. Virtual function is define in Sec 10.3 and i havent found any thing related to implementation of virtual function. Standard doesnt say anything about implementation of virtual function, so it means it can be diff from compiler to compiler.