Click to See Complete Forum and Search --> : The Qsort challenge


AnthonyMai
November 26th, 2002, 07:55 PM
A while ago, I posted my Qsort code and explained why it can be used to sort any object arrays or vectors, just like any POD data types. The code works on all compilers I can lay my hands on. And it is much faster than std::sort when the object is none trivial.

There was one, unsubstantiate claim that when a specific compiler was used, and when sorting a specific type of objects, Qsort does not give the right result. That claim was un-substantiated because it was not verified by any one else that uses that specific compiler, nor did the original poster provide any file to back it up.

So here is the challenge, I am posting my Qsort code again. And if the original challenge do believe it doesn't work for a specific object type on a specific compiler, please compile the stuff with assembly output enabled, and zip all the file up, and post it here. So every one can verify the result.

Here is my Qsort code:

#include <vector>
#include <algorithm>
#include <functional>
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <string>

#define ELEMENT_COUNT 10000 //00

#define BUFF_SIZE 4096

#define CUTOFF 8 /* testing shows that this is good value */

using namespace std;

class SomeClass
{
public:
SomeClass();
~SomeClass();

SomeClass(const SomeClass& src);
SomeClass& operator=(const SomeClass& src);

std::string fld1;
char* m_pBuff;
};


SomeClass::SomeClass()
{
m_pBuff = new char[BUFF_SIZE];
}

SomeClass::~SomeClass()
{
delete [] m_pBuff;
}


SomeClass::SomeClass(const SomeClass& src) :
fld1(src.fld1)
{
m_pBuff = new char[BUFF_SIZE];
memcpy(m_pBuff, src.m_pBuff, BUFF_SIZE);
}

SomeClass& SomeClass::operator=(const SomeClass& src)
{
fld1 = src.fld1;

memcpy(m_pBuff, src.m_pBuff, BUFF_SIZE);

return *this;
}


struct SortFld
{
inline bool operator() (const SomeClass& first, const SomeClass& second)
{
return strcmp(first.fld1.c_str(), second.fld1.c_str())<0?true:false;
}
};


typedef vector<SomeClass> intvec;

void randomise_vector(intvec& v)
{
srand(time(0));
char szBuf[15];
for (intvec::iterator i = v.begin(); i != v.end(); ++i)
{
sprintf(szBuf, "%d", 100 + (((unsigned int)rand())%999));
(*i).fld1 = szBuf;
strcpy((*i).m_pBuff, szBuf);
}
}

inline bool stllessfunc(const SomeClass& lhs, const SomeClass& rhs)
{
return strcmp(lhs.fld1.c_str(), rhs.fld1.c_str())<0?true:false;
}

inline int qscompare(const void* lhs, const void* rhs)
{
return strcmp(((SomeClass *)lhs)->fld1.c_str(), ((SomeClass *)rhs)->fld1.c_str());
}


void __cdecl Qsort (
void *base,
unsigned num,
unsigned width,
int (__cdecl *comp)(const void *, const void *)
);

int main()
{
srand(clock());
intvec v(ELEMENT_COUNT);

randomise_vector(v);

cout << "std::sort using functor: ";
clock_t start = clock();
sort(v.begin(), v.end(), SortFld());
clock_t stop = clock();
cout << stop - start << " ticks" << endl;

randomise_vector(v);

cout << "std::sort using function: ";
start = clock();
sort(v.begin(), v.end(), stllessfunc);
stop = clock();
cout << stop - start << " ticks" << endl;

randomise_vector(v);
cout << "Qsort: ";
start = clock();
Qsort(static_cast<void*>(&v[0]), ELEMENT_COUNT, sizeof(SomeClass), qscompare);
stop = clock();
cout << stop - start << " ticks" << endl;

char c;
cin >> c;
return 0;
}


// This is Anthony's improved version of qsort -A.Mai 7/18/2002
void __cdecl Qsort (
void *base,
unsigned num,
unsigned width,
int (__cdecl *comp)(const void *, const void *)
)
{
char *lo, *hi;
char *mid;
char *loguy, *higuy;
unsigned size;
char *lostk[30], *histk[30];
int stkptr;

if (num < 2 || width == 0)
{
return;
}

stkptr = 0;

lo = (char*)base;
hi = (char *)base + width * (num-1);

recurse:

size = (hi - lo) / width + 1;

if (size > CUTOFF)
{
mid = lo + (size / 2) * width;

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;

w2 --;
tmp = mid[w2];
mid[w2] = lo[w2];
lo[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&mid[w2-4]);
*(int*)(&mid[w2-4]) = *(int*)(&lo[w2-4]);
*(int*)(&lo[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&mid[w2]);
*(int*)(&mid[w2]) = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = tmp;
}
}
}

loguy = lo;
higuy = hi + width;

for (;;)
{
do {
loguy += width;
} while (loguy <= hi && comp(loguy, lo) < 0); // <=

do {
higuy -= width;
} while (higuy > lo && comp(higuy, lo) > 0); // >=

if (higuy < loguy)
{
break;
}

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;

w2 --;
tmp = loguy[w2];
loguy[w2] = higuy[w2];
higuy[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&loguy[w2-4]);
*(int*)(&loguy[w2-4]) = *(int*)(&higuy[w2-4]);
*(int*)(&higuy[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&loguy[w2]);
*(int*)(&loguy[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}
}

{
register int w2 = width;

if (w2 < 4)
{
do
{
char tmp;
w2 --;
tmp = lo[w2];
lo[w2] = higuy[w2];
higuy[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&lo[w2-4]);
*(int*)(&lo[w2-4]) = *(int*)(&higuy[w2-4]);
*(int*)(&higuy[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
int tmp = *(int*)(&lo[w2]);
*(int*)(&lo[w2]) = *(int*)(&higuy[w2]);
*(int*)(&higuy[w2]) = tmp;
}
}
}

if ( higuy - 1 - lo >= hi - loguy )
{
if (lo + width < higuy)
{
lostk[stkptr] = lo;
histk[stkptr] = higuy - width;
++stkptr;
}

if (loguy < hi)
{
lo = loguy;
goto recurse;
}
}
else
{
if (loguy < hi)
{
lostk[stkptr] = loguy;
histk[stkptr] = hi;
++stkptr;
}

if (lo + width < higuy)
{
hi = higuy - width;
goto recurse;
}
}
}
else
{
char *p, *max;

while (hi > lo)
{
max = lo;
for (p = lo+width; p <= hi; p += width)
{
if (comp(p, max) > 0)
{
max = p;
}
}

if ( max != hi )
{
register int w2 = width;

if (w2 < 4)
{
do {
char tmp;

w2 --;
tmp = max[w2];
max[w2] = hi[w2];
hi[w2] = tmp;
} while (w2 > 0);
}
else
{
int tmp;

tmp = *(int*)(&max[w2-4]);
*(int*)(&max[w2-4]) = *(int*)(&hi[w2-4]);
*(int*)(&hi[w2-4]) = tmp;
w2--;
w2 &= -4;
while (w2 >= 4)
{
w2 -= 4;
tmp = *(int*)(&max[w2]);
*(int*)(&max[w2]) = *(int*)(&hi[w2]);
*(int*)(&hi[w2]) = tmp;
}
}
}

hi -= width;
}
}


--stkptr;
if (stkptr >= 0)
{
lo = lostk[stkptr];
hi = histk[stkptr];
goto recurse;
}

return;
}

Yves M
November 26th, 2002, 08:17 PM
Well, try this ;)


#define ELEMENT_COUNT 10000000

typedef vector<int> intvec;

void randomise_vector(intvec& v)
{
srand(time(0));
char szBuf[15];
for (intvec::iterator i = v.begin(); i != v.end(); ++i)
{
*i = rand();
}
}

inline int qscompareint(const void* lhs, const void* rhs)
{
return (*((int *) lhs) < (*(int *)rhs));
}

int main()
{
srand(clock());
vecor<int> v(ELEMENT_COUNT);

randomise_vector(v);

cout << "std::sort";
clock_t start = clock();
sort(v.begin(), v.end());
clock_t stop = clock();
cout << stop - start << " ticks" << endl;

randomise_vector(v);
cout << "Qsort: ";
start = clock();
Qsort(static_cast<void*>(&v[0] ), ELEMENT_COUNT, sizeof(int), qscompareint);
stop = clock();
cout << stop - start << " ticks" << endl;

char c;
cin >> c;
return 0;
}

It's not sorting classes, only sorting integers, but I can see which version is faster ;)

PaulWendt
November 26th, 2002, 08:47 PM
I eagerly wait for Philip's response, but in the meantime, there
have already been ways people mentioned to get beyond the
copy constructor. It's not that std::sort is any slower; it's that it
calls the copy constructor, which your method doesn't do. You say
that you can use a memcpy() [or the equivalent] to swap
elements ... and that behavior is undefined in the standard.
Maybe your code will work on every compiler known to man; then
again, maybe it won't.

Like many people said in the last thread, instead of sorting items
that require costly copy constructors to be called, make the
container hold pointers instead of objects by value. If I recall
correctly, tests run in those conditions had std::sort performing
at around the same speed as your QSort() [I can't recall, but
std::sort might have been negligably faster].

The point is that you're using non-standard behavior and a
flawed methodology to "prove" that your sort is superior. It
doesn't take a PhD to realize that calling copy constructors will
be costly; most C++ programmers who've been at things a while
will recognize this and make changes appropriately. Of course,
a container of objects by value is the rare case anyway. In many
C++ applications, we want to deal with objects as a base class
so that we can utilize polymorphism effectively.

--Paul

Paul McKenzie
November 26th, 2002, 08:57 PM
The code works on all compilers I can lay my hands onFamous last words for those who stray from the standard, and find out that their compiler lets them get away with it. Just like calling "delete" on a "new[]" object works on many compilers.And it is much faster than std::sort when the object is none trivial.And if it is non-trivial, then what? Cross your fingers and hope the program doesn't core dump or GPF?There was one, unsubstantiate claim that when a specific compiler was used, and when sorting a specific type of objects, Qsort does not give the right result. That claim was un-substantiated because it was not verified by any one else that uses that specific compiler, nor did the original poster provide any file to back it up.That's because no one wants to waste their time. Also, all of this is your job, not ours. You are the one making the wild claims that your sort is so great, you do the research. You're kidding if you think that a failure in your code is going to make the person zip up assembly language to you. What for? It failed, just as what we stated could happen.

We boldly stated that what you were doing is undefined as per ANSI specifications. Everyone from people here at CodeGuru to comp.lang.c++ to the head of the ANSI committee. All stated that qsort or your QSort is not guaranteed to work for non-POD objects types, most likely objects derived from virtual base classes. You didn't listen and you got burned. Do you need more convincing? Post your code to comp.std.c++. Let them have a laugh. Or are all of them wrong too?

If it's so great and you're so sure of the soundness of your code, why haven't you submitted any of it to any C++ magazine, expert, periodical, whatever to have them replace the std::sort algorithm with your sort? What are you afraid of? www.boost.org takes submittals, why not submit it there?

Listen, most are really tired of debating this with you and we know that you lost this debate a long time ago. Instead, you should be spending your efforts coming up with a faster std::sort, and forget about this q/QSort debacle.

Regards,

Paul McKenzie

Zeeshan
November 26th, 2002, 09:52 PM
Again useless discuss started by AnthonyMai.

Regardless of all the technical details which are given to AnthonyMai i want to add one more claim in this thread.

BMW is better than CAR.

Well what is the significance of this claim? Nothing i m comparing totally two different things. If i said BMW is batter than FARARI regardless claim is right or wrong (Although Formula 1 just prove opposite:) ) it at least make scence. But in the above claim i compare apples with oranges.

Now AnthonyMai can you tell me what is std::sort? What algo does it follow? Where in standard it is written that library vendor has to use this algo to implement std::sort?

std::sort is just an specification now it is upto library vendor to implement it with suitable algorithm. So you are comparing qsort with just specification.

If you said qsort is better than this implementation of std::sort then at least this claim made sence (it doesnt mean claim is right claim is still wrong on POD)

If you still claim you are right and all of us are wrong then dont waste you time on us and publish your ideas on some magzine rather than start comparing this useless stuff after every few weeks.

Paul McKenzie
November 26th, 2002, 10:27 PM
Originally posted by Zeeshan
Again useless discuss started by AnthonyMai.

Regardless of all the technical details which are given to AnthonyMai i want to add one more claim in this thread.

BMW is better than CAR.Believe it or not I got a chuckle when you stated that :) It is exactly what this is -- comparing a sorting method with a specification.Now AnthonyMai can you tell me what is std::sort? What algo does it follow? Where in standard it is written that library vendor has to use this algo to implement std::sort?Which is why it makes more sense to improve std::sort, but I've stated this over and over but it goes in one ear and out the other.If you still claim you are right and all of us are wrong then dont waste you time on us and publish your ideas on some magzine rather than start comparing this useless stuff after every few weeks. You cannot get away with posting or submitting this stuff anywhere else without being humiliated. I posted Mai's qsort claim on comp.lang.c++, and it was a good thing I posted just a few of the responses. Yves (he wasn't the moderator back then, but if he was) would have been "snipping" away and placing asterisks on many of the words. One guy got so frustrated with trying to explain everything we are trying to explain, he just let loose with some choice words.

Regards,

Paul McKenzie

Gabriel Fleseriu
November 27th, 2002, 07:32 AM
It is trivial to write a class that, without having any practical use, will break Qsort.

AnthonyMai
November 27th, 2002, 09:22 AM
It is trivial to write a class that, without having any practical use, will break Qsort.


Talking is cheap. Oh I am sure you can write a bizillion different classes that will break Qsort. Can you actually come up with one and post the source code as well as the compilation result here? Without seeing that, and without seeing the original challenger who claimed that Qsort didn't work on his compiler posting his compiler output to back up his claim, it's all cheap talks.

BTW, std::sort doesn't work on a lot of things, too. Try throw in a couple of private constructors or private destructors. Or try to throw in some auto pointers. Or try to put in some pointers that point to other instances of the object.

Basically std::sort is broken whenever you have some class where appropriate copy constructors can not be written.

There is one case where std::sort would not work, and Qsort would work perfectly. One concept that is some people's favorable is the so called Int2Type concept, You instantiate a different object type, based on different value of an integer or enum passed in as template parameter. std::sort would not be able to operate on a continuous array of such objects, since each instance of the object is all of different type, based on the different integers used to instantiate them. However, since all these objects have the same size and data structure internally, Qsort would work on them without a problem!!!

Gabriel Fleseriu
November 27th, 2002, 09:30 AM
class SomeClass
{
public:
SomeClass():val_(0){self_=this;}
~SomeClass(){};
SomeClass(const SomeClass& src){self_=this; val_=src.val_;}
SomeClass& operator=(const SomeClass& src){ val_=src.val_; self_=this; return *this;};
int get_val()const{return self_->val_;}
void set_val(int i){self_->val_=i;}
private:
SomeClass *self_;
int val_;

};

Yves M
November 27th, 2002, 09:46 AM
Originally posted by Paul McKenzie
Which is why it makes more sense to improve std::sort
I've been thinking about this. The basic problem in this case is that copying and swapping "values" is expensive and I guess introsort doesn't handle this well. Element comparison is not expensive though. What other sorting algorithm would do less swaps ?

Then again, I wonder whether this problem is worth solving at all, since the case where you would put large chunks directly into a container as opposed to pointers is probably rare.

Maybe there is a need for specific "indirect" containers ? Like a indirect_vector which would only store reference counted pointers to objects ?

jfaust
November 27th, 2002, 09:58 AM
Maybe there is a need for specific "indirect" containers ? Like a indirect_vector which would only store reference counted pointers to objects ?


That's generally what I do for such containers:


typedef boost::shared_ptr<MyClass> MyPtr;
typedef std::vector<MyPtr> MyClassVector;

MyClassVector v;

// memory released automatically after use
v.push_back(MyPtr(new MyClass));


Now, when templated typedefs are added to the standard, you could do something like:


// Not C++ yet, but fingers are crossed:
template<typename T>
typedef std::vector<boost::shared_ptr<T> > SharedVector;

typedef SharedVector<MyClass> MyClassVector;


You could even templatize the container...

Jeff

Paul McKenzie
November 27th, 2002, 10:00 AM
Originally posted by AnthonyMai


Talking is cheap. Again, post this nonsense in a peer group of C++ experts and see how far you get. You keep saddling us with this junk, it's time for you to stop the "cheap talk" and show your code and explain it to experts.

Regards,

Paul McKenzie

jfaust
November 27th, 2002, 10:06 AM
Again, post this nonsense in a peer group of C++ experts and see how far you get.


Yes, Anthony, why don't you take the gloves off and post this where you can be properly ridiculed?

Jeff

Yves M
November 27th, 2002, 10:09 AM
Originally posted by jfaust
Now, when templated typedefs are added to the standard, you could do something like:

Arg, I would love templated typedefs :) The workarounds are not always pretty ;)

But well, vector_indirect would not satisfy the property of vector that all elements are stored contigously :/

P.S. Please don't get into flame-wars ;) This thread (and the last one on qsort) is interesting, so it's best not to get too personal. I would hate to have to close this thread :/

jfaust
November 27th, 2002, 10:12 AM
But well, vector_indirect would not satisfy the property of vector that all elements are stored contigously :/


Insofar as you can access the elements as an array, the elements (pointers) are contiguous. I think this is what really matters--constant time access to elements.

Jeff

Paul McKenzie
November 27th, 2002, 11:35 AM
Originally posted by Yves M P.S. Please don't get into flame-wars ;) This thread (and the last one on qsort) is interesting, so it's best not to get too personal. I would hate to have to close this thread :/ Yves, Mai's QSort shenanigans are starting to become troll bait.

If I started a thread called "it is safe to use delete on operator new[]" would you keep allowing me to post code after code showing that "compiler x accepts it", knowing full well that doing this is undefined by ANSI standards? What would be my goal of continuosly posting it? What if I started another thread, "returning references to local variables is OK", and I showed that for "compiler x it is safe", how long would I get away with it? In general, how long can someone post saying "I've beaten the standard -- I got it to work for such-and-such a compiler" without people who know better turning it into a personal discussion? If you had a chance to take a look, you saw what happened at comp.lang.c++. It turned into polite explanation, to hostility after a few rounds with Mai (who never really ventured in there -- it took myself to post his findings to comp.lang.c++).

Copying non-POD objects without invoking the proper constructor is not defined by the ANSI standard, therefore not guaranteed to work (as is reported by Phillip Nicoletti when run on g++ and PG compilers). Is Mai claiming that it is guaranteed to work? If he is stating this, he should say "yes it is guaranteed to work" or "no, it is not guaranteed to work". At least we can somewhat pin down what his goal is here of posting this stuff. If he is saying that it is guaranteed to work, the experts will disagree, point to line and verse in the standard, and have a good laugh. If he is saying it is not guaranteed to work, at least state where it is not guaranteed to work (i.e. under what conditions, etc.).

As Paul Wendt mentions, every compiler known to man may work with QSort code, but maybe not. All the compilers Mai has may work if you call "delete" on a "new char[]". Does this make it correct code? Mai would think so, the experts know better.

Regards,

Paul McKenzie

Yves M
November 27th, 2002, 11:49 AM
Paul,

For all the good and bad things, we're not on comp.lang.c++ or comp.std.c++ here, so I assume many of the people who post here don't necessarily have a copy of the standard. This is one of the reasons why I think this discussion is interesting, as it shows what you can't do with simple memcpy.

Is Mai claiming that it is guaranteed to work? If he is stating this, he should say "yes it is guaranteed to work" or "no, it is not guaranteed to work". At least we can somewhat pin down what his goal is here of posting this stuff.
True, it would be interesting to know. I guess the answer is that it is faster and works in "many" cases. Of course it might be interesting to know what "many" means. But ultimately it is a "useless" discussion because it's not guaranteed by the standard.

What this discussion exemplifies though is that std::sort has pitfalls in its speed.

Nevertheless, I don't like people shouting at each other ;)

AnthonyMai
November 27th, 2002, 03:14 PM
Gabriel posted:


class SomeClass
{
public:
SomeClass():val_(0){self_=this;}
~SomeClass(){};
SomeClass(const SomeClass& src){self_=this; val_=src.val_;}
SomeClass& operator=(const SomeClass& src){ val_=src.val_; self_=this; return *this;};
int get_val()const{return self_->val_;}
void set_val(int i){self_->val_=i;}
private:
SomeClass *self_;
int val_;

};


Gabriel: Nothing works for every thing, not even the STL. std::sort would not work objects that is not copyable. The problem with your sample code is that your class contains a pointer which points to certain instance of the class, which are being moved around, voiding the pointer.

So this is the sort of object that is NONE-copyable. Yes you have written a copy assignment operator. But the truth is, the object you copied using your copy assignment operator is NOT identical to the original!!!

How come? Try make a copy from an invalid object whose self pointer was corrupted, you will get a valid object nevertheless. An invalid object and a valid object certainly are NOT equivalent. Your copy operator copies an invalid object and turn it into a valid one, so it is NOT making an authentic copy!!!

General speaking, when your class contains a pointer that points to data of the same type, such objects are not exactly copyable, and std::sort may fail on such objects when no meaningful copying can be carried out.

For an example, replace your
SomeClass *self_;
with
SomeClass * pPrevious;

pPrevious would point to the previous element in the vector, or NULL, if it is the first element in the vector, or if it is a stand along object. Try sort the vector with std::sort, then you will find that pPrevious no longer makes sense any more.

In previous discussions I do point out that Qsort is suitable as long as it does not contain data who depend on the location of the object itself. And I point out that when it does, then proper copying of the object itself is impossible and std::sort would not work either.

In the case of Gabriel's example, if we allow the self pointer to be NULL and use that to represent special meanings, for example NULL means the object has not been initialized or has been temporary nullified, etc., then std::sort would NOT sort it properly either.

galathaea
November 27th, 2002, 04:38 PM
I didn't know that the an object whose copy constructor or assignment operators didn't make identical copies were non-copyable (or none-copyable?!?). I thought that was the difference between shallow and deep copy?

Also, just a point, but I thought the register keyword was always ignored by visual c++, but it looks like that is the compiler that this was built for...

If I'm wrong, I know I'll find out...

Bassman
November 27th, 2002, 05:04 PM
Hi, Anthony:

It is up to me as the author of my class to determine what behaviour the class should manifest when copying.

std::sort respects my design decisions by calling my operators on my class to do what I want. If it can't (because my class is copy-protected) then it will inform me at compile time.

QSort blindly assumes that when I say copy that I meant bitwise copy. If it is wrong, it doesn't tell me at compile time. It doesn't fail gracefully at runtime. It tells me by corrupting my data. Thanks, QSort.

Personally, I don't think that using QSort is worth the potential maintenance nightmares for a minimal (if any at all) performance increase.

Regards,
Bassman

Yves M
November 27th, 2002, 05:17 PM
Originally posted by AnthonyMai
So this is the sort of object that is NONE-copyable. Yes you have written a copy assignment operator. But the truth is, the object you copied using your copy assignment operator is NOT identical to the original!!!

Hum, I get it ;) an object is only copyable if it performs a bit-wise copy of itself when the copy constructor is called. Hence Qsort works on all copyable objects and there is no need for std::sort (or even a copy constructor).

Is this what you mean ?

AnthonyMai
November 27th, 2002, 07:36 PM
Hum, I get it an object is only copyable if it performs a bit-wise copy of itself when the copy constructor is called. Hence Qsort works on all copyable objects and there is no need for std::sort (or even a copy constructor).

Is this what you mean ?


No, that is not what I mean. I never said that copy must be done bit by bit. ALSO I never said Qsort makes copies bit by bit. Qsort never copies objects, it merely swap objects, in a bit by bit fashion. Because Qsort does not need to make object copies, it is much faster than std::sort when the object is none-trivial to copy.

How you copy an object depends on what you need to copy it for. If you copy it because you need a duplicated object for external use, probably a shallow copy is good enough.

But the copying in std::sort is meant to be copying in order to swap the object. As such, you want to preserve as much as possible how the object looks like, before and after copy. If as the result of std::sort copying and swapping, the objects are change some how, then the next effect is more than just sorting them, you have altered them, which is not acceptable.

Look at Gabriel's copy operator below again, you will see that only one member from the source object is copied, the other member, the self pointer, no matter what it was in the source object, is totally ignored. So at best it is only a partial copy, and the object have been altered during copying, and the copied object and the original does not look the same.

But then again, Gabriel's case is a case where the object can not be appropriately copied. One really should be surprised at the concept that some objects can not be copied. Singleton, for example, can not be copied by its very definition.

And by definition, std::sort can not sort objects that can not be exactly copied. The advantage of Qsort is that even if an object is none-copyable, Qsort may still sort it, since Qsort swap them without making copies.

But just like std::sort can not work on all situations, neither can Qsort. If there is a kind of object that can neither be copied, nor can it even be swapped at all, then Qsort would not work. Certainly if you can not swap the order of two objects, you can not hope to sort them at all, no matter what you use.

Such objects do exists.


class SomeClass
{
public:
SomeClass():val_(0){self_=this;}
~SomeClass(){};
SomeClass(const SomeClass& src){self_=this; val_=src.val_;}
SomeClass& operator=(const SomeClass& src){ val_=src.val_; self_=this; return *this;};
int get_val()const{return self_->val_;}
void set_val(int i){self_->val_=i;}
private:
SomeClass *self_;
int val_;

};

AnthonyMai
November 27th, 2002, 07:44 PM
std::sort respects my design decisions by calling my operators on my class to do what I want. If it can't (because my class is copy-protected) then it will inform me at compile time.

QSort blindly assumes that when I say copy that I meant bitwise copy. If it is wrong, it doesn't tell me at compile time. It doesn't fail gracefully at runtime. It tells me by corrupting my data. Thanks, QSort.


People still don't get the idea. How many times do I need to repeat that Qsort sorts NOT by making copies, but by simply swapping objects, without making copies of the object.

Never at any moment during Qsort operation that any new object is created or any old one is destroyed. However many objects you have at the beginning of the Qsort, you still have that many during and after Qsort. Nothing is created, nothing is destroyed, nothing is copied. Objects are just swapping positions with each other. So copying is really irrelevant.

On the contrary, std::sort() constantly creates new objects, makes copies, and destroys old objects, in order to sort. Just as you said, if un-fortunately your object is copy protected, std::sort() would not work.

And Qsort would still work, even if the object is copy protected.

Now, how are you supposed to sort an object array who are copy protected?

Paul McKenzie
November 27th, 2002, 08:56 PM
Objects are just swapping positions with each other. So copying is really irrelevant. On the contrary, std::sort() constantly creates new objects, makes copies, and destroys old objects, in order to sortSo here is the question: Why do you think std::sort does this? Is it because the persons who coded the std::sort are inferior programmers or don't know what they're doing? Why didn't they "just swap objects in memory" as you're doing? Are they dunderheaded junior level coders? Are you doing all of this to show that you're better than all of the C++ programmers and experts who crafted std::sort? What is it? An answer to this question from you will reveal much more than this crud you're posting as your holy grail.

This is really getting ridiculous folks. I'm sorry to say this but it really is. We know what the standard says on non-POD objects, but Mai thinks the standard is something to throw away if something "seems to work". Objects are not required to have their storage in contiguous memory, and experts have shown that objects derived from virtual base classes are in some cases not implemented in contiguous memory.

But again the (rhetorical) question still remains Why has no implementation of std::sort done what Mai's done?

Regards,

Paul McKenzie

Bassman
November 27th, 2002, 09:02 PM
People still don't get the idea. How many times do I need to repeat that Qsort sorts NOT by making copies, but by simply swapping objects, without making copies of the object.

Ridiculous. You ARE copying the object. Bit by bit. One object's data ends up in a different memory location from where it was instantiated. That data is a copy of the original object. It is NOT the original object.

And Qsort would still work, even if the object is copy protected.

That's the point. Why would I want to ignore a clear design decision that made an object copy protected? If an object is not initially designed to have copies, you SHOULDN'T MAKE COPIES. std::sort acknowledges this. QSort doesn't.

If you need to revisit the design, revisit the design. Don't try to hack your way around it by using an unstandardized, flawed method.

Now, how are you supposed to sort an object array who are copy protected?

You don't. Sort an array of indices. Sort an array of pointers.

Regards.
Bassman

Paul McKenzie
November 27th, 2002, 09:28 PM
Originally posted by Bassman
That's the point. Why would I want to ignore a clear design decision that made an object copy protected? If an object is not initially designed to have copies, you SHOULDN'T MAKE COPIES. std::sort acknowledges this. QSort doesn't.Check up on Phillip Nicoletti's findings on how QSort fails on the g++ and Pacific Group compilers. Mai is trying to dismiss this and look the other way, but I certainly will not let this little tidbit of information get lost in this debate.If you need to revisit the design, revisit the design. Don't try to hack your way around it by using an unstandardized, flawed method.Which is the basic flaw of many 'C' coders: If they can't find or don't know a C++ way of doing things, they resort to using something their familiar with, and that is hacking with 'C', regardless if the hack is deemed to be undefined by the language.

This is why it is emphasized that C and C++ are two separate languages. There is the C++ mind, and the 'C' mind. Approaching the concept of classes, algorithms, containers, etc. with a 'C' mind is a dangerous mix. A 'C' programmer is trained that a struct is just some member variables that are copyable. A 'C' programmer uses sizeof(struct whatever) throughout their code, they copy structures all over the place with memcpy's etc. All of this is valid in a 'C' world. For C++, structs (and classes) that are not 'C' structs (i.e. non-POD) take on a different persona. They just can't merely be copied all over the place with memcpy(), or any of the other things that a 'C' coder usually does with a 'C' type structs. The only kind of C++ struct/class that you can use for 'C' type copying are POD structs. This is emphasized in the standard, something that Mai throws away like a used paper towel. Some 'C' programmers understand this and know that C++ has subtelties that 'C' does not come close to having. Others are stubborn and can't grasp this concept. We know which camp Mai falls into.

Regards,

Paul McKenzie

Gabriel Fleseriu
November 28th, 2002, 02:21 AM
Originally posted by AnthonyMai:
So this is the sort of object that is NONE-copyable.

A non-copyable class is spelled like this in C++:
class non_copyable
{
/*.....*/
private:
non_copyable(const non_copyable&);
non_copyable& operator=(const non_copyable&);
};

If 'valid copy' and 'bit-by-bit copy' were synonyms, one would never need to write a copy-ctor or an operator =.

Originally posted by AnthonyMai:
...if the original challenge do believe it doesn't work for a specific object type on a specific compiler...

I have given you an example of a class that breaks Qsort but is handled correctly by std::sort. Philip Nicoletti gave you an example of another class and a compiler where Qsort fails. At lest three or four people posted what the Standard states about copying non-POD types.

Now, how are you supposed to sort an object array who are copy protected?
If a class is copy protected because of a good reason, you will be able to create an array of objects only by using their default ctor. You won't be able to push_back (or equivalent) such objects. The result would in most cases be an array that contains identical objects -- thus no need to sort it.

Assuming that the default ctor creates different objects (by using a static variable, or whatever), you take an array of pointers to the objects and sort that.

A sorting function is of no use without a specification about the objects that can be sorted. std::sort has such a specification. What is the specification of Qsort, in your oppinion? Can you write it down and back it up with the Standard? If not, we're wasting our time.

AnthonyMai
November 28th, 2002, 09:53 AM
If a class is copy protected because of a good reason, you will be able to create an array of objects only by using their default ctor. You won't be able to push_back (or equivalent) such objects. The result would in most cases be an array that contains identical objects -- thus no need to sort it.


push_back is a concept in std::vector. An object array is not necessarily a std::vector, it can be a C like array, in which case there is no need to use push_back. And you can allow all kinds of public constructors for the object, except that you would NOT allow a copy constructor or a copy assignment operator.

There are plenty of examples where we can construct a type of object in a number of ways, but you want to prohibit the copying of the object. The reason is each instance of the object is unique and you never ever want to duplicate it.

For example you may have a thread object, each has a unique thread ID, priority and all other related information. Each thread object is certainly unique so you never ever want to duplicate it at any moment. Now, the operating system need to sort these thread objects by priority. How do you do it? It must be sorted without making duplications at any given moment.

Another example is objects which each has a unique sequence number and the sequence number must be unique and continuous. The object can be constructed at different spots, at different times, using a number of different constructor, but each of these constructors would all call a base class constructor which calls a global sequence number generator to fetch and assign the sequence number properly.

Now I want to sort these objects, according to their individual sequence number, or according to other sorting criterials. Mean while I do NOT want to scramble their sequence numbers or disturb the global sequence number generator. How would you do it? Obviously you can NOT call any constructor at all if you can not disturb the sequence number generator, so you really can't sort using std::sort.

And under this situation Qsort would work better than std::sort, because it swaps objects without actually making any copies.



Assuming that the default ctor creates different objects (by using a static variable, or whatever), you take an array of pointers to the objects and sort that.


There will be cases you are not satisfied with an indirect index array, but you want to sort the objects themselves directly.


A sorting function is of no use without a specification about the objects that can be sorted. std::sort has such a specification. What is the specification of Qsort, in your oppinion? Can you write it down and back it up with the Standard? If not, we're wasting our time.


std::sort can only sort objects that an appropriate copy operation can be carried out, and is not prohibited to carry out. Qsort does not have such limitation. But Qsort has its own limitation, an object must not have pointers (explicit or hidden) that directly points to itself or members within itself. By having such pointers once the object moves in the memory it is no longer valid.

jfaust
November 28th, 2002, 10:04 AM
There will be cases you are not satisfied with an indirect index array, but you want to sort the objects themselves directly.


I can't think of any such example. Pointers to objects avoid slicing. Arrays and polymorphic objects just don't mix.


But Qsort has its own limitation, an object must not have pointers (explicit or hidden) that directly points to itself or members within itself.


... and must not be used on non-POD types.

Jeff

Kdr Kane
November 28th, 2002, 10:15 AM
Nice conversation.

Although some of you are completely frustrated, it really helps me to understand the C++ concepts.

In the end, dinosaurs that can't accept change die a horrible death.

Thanks for these boards. ;)

Bassman
November 28th, 2002, 10:18 AM
For example you may have a thread object, each has a unique thread ID, priority and all other related information. Each thread object is certainly unique so you never ever want to duplicate it at any moment. Now, the operating system need to sort these thread objects by priority. How do you do it? It must be sorted without making duplications at any given moment.

Sort an array of indices. Sort an array of pointers.

And under this situation Qsort would work better than std::sort, because it swaps objects without actually making any copies.

Ridiculous. You ARE copying the object. Bit by bit. One object's data ends up in a different memory location from where it was instantiated. That data is a copy of the original object. It is NOT the original object.

Wow, deja-vu!

There will be cases you are not satisfied with an indirect index array, but you want to sort the objects themselves directly.

If you need to make copies of the class, then the class should not be designed with copying prohibited, in which case, std::sort will work perfectly.

std::sort can only sort objects that an appropriate copy operation can be carried out, and is not prohibited to carry out. Qsort does not have such limitation.

It is not a limitation. QSort is a poor tool because it does not respect my class design decisions.

Regards,
Bassman

Paul McKenzie
November 28th, 2002, 11:03 AM
Originally posted by jfaust ... and must not be used on non-POD types. Which is where the focus of this should be. If Mai could, at the very least, admit this, there is progress. If he fails to admit this, this is clearly evident that he believes that moving objects without calling the appropriate copy constructor is valid. We have the ANSI standards, failed QSort runs on various compilers, and the C++ experts to back us up. There is no need for further debate on this aspect -- QSort is not guaranteed to work on non-POD objects. Period.

Having said this. I'm trying to pin down exactly what is the point of QSort. Others may want to take the technical approach and debate with Mai, but this really has gone beyond technical reasons for QSort and into another realm.

Is QSort posted to one-up the ANSI standard and state that the standard is invalid? Is it to prove that Mai is better than C++ programmers who developed std::sort? Is it to show that 'C' programmers are superior to C++ programmers including Josuttis, Stroustrup, Plauger, et. al.? Is it just to plain old show off and have everyone raise Mai up on their shoulders as a "hero" or a programming "god"? Is it a combination of these things? One really has to question the possibility of psychological reasons as to why this QSort debate continues.

A reasonable man would have acknowledged that QSort didn't hold up against the standard, and would state that this can only be used for 'C' structs and not for non-trivial C++ objects. There is nothing wrong with developing 'C' code for 'C' issues. There is a big problem when you develop 'C' code for C++ issues that violate ANSI standards.

Regards,

Paul McKenzie

PaulWendt
November 28th, 2002, 11:28 AM
Hey, if nothing else, I think he improved on the times for
Microsoft's original qsort(). Granted, those were with his tests
and more comprehensive tests might find a weakness in QSort(),
but from what I've seen so far, QSort() seems to perform as well
as qsort() and sometimes better. That's great and all and I think
that was his main point with QSort() ... and then he realized that
his QSort() was faster than std::sort() too! That's where the
trouble started.

It's just like when you define a new and delete for your own
class; because you're using it for a specific example, it's going to
be faster so by all means, do it. His QSort() probably works
faster for all the times he needs it, but I'm guessing that there
will be some instances where it's slower than qsort(). Sometimes
you have to pick the generic approach to make things easier,
even if there are some tradeoffs. That's why it's good to have the
skills to know where and how to make performance increases if
you need to. Kudos to you for that, Mai ... but I have to agree
with the rest and say that QSort() probably isn't as useful for
sorting non-POD data types....

--Paul

Yves M
November 28th, 2002, 11:44 AM
Paul Wendt, you are right, his Qsort is much better than the qsort that comes with MSVC. But std::sort uses introsort, which is even better, as I've "shown" in Philip's thread for sorting doubles.

I think the important point of this discussion is that copy constructors (or assignments) can be costly, so avoid them. It's not an "inherent flaw" in std::sort that it uses copy constructors, but it's a decision.

The point that using std::sort for sorting the types of classes that Mai used in his example is extremely slow is entirely valid. I don't think that qsort or Qsort is the answer though, but rather sorting an array of pointers. This is a very important point that many STL beginners (including me ;) ) get wrong. If your classes are non-trivial than it's much better to put pointers to them inside the container rather than the full class. As an added benefit you will be able to use polymorphism on them.

jfaust
November 28th, 2002, 12:06 PM
Originally posted by Paul McKenzie
I'm trying to pin down exactly what is the point of QSort.


Well, I think it's just a starting point for argument. I give Anthony enough credit that I don't think even he'd use the thing.

Jeff

galathaea
November 28th, 2002, 08:47 PM
Yves, I think your summary is the most reasoned I've heard yet on this issue. I know how easy it is to get lost in the frustration (I've already been warned once about that myself, and I've only been around a few months), but you have made the most important point about this whole discussion. Time-wise, the program does not compare with std::sort, which is the point of it (as I follow the conversation), as an order of complexity issue. And as to the point used for creating it in the first place (which I take to be avoiding the copy calls -- assignment and ctor), the whole c++ world would agree that pointers (or some smarter indirection, as one's tastes go) is the proper idiom that they even teach in many college courses and standard books on data structure management!

Not that I disagree with the more passionate stands. I love a good fight. But I wanted to thank Yves for the clarity in the mist...

PaulWendt
November 29th, 2002, 08:38 AM
Look! It's a bird; it's a plane! It's Super Moderator!

Serhio
November 29th, 2002, 06:43 PM
Hi to All!
I want to add some notes to Yves reply.
Really, the estimation of any sorting algorithm on any concrete sequence always normally based on :
- count of made comparisons operations;
- count of made swapping operations.
So, time of working the some sorting algorithm on some sequence is based on these two counts and how effective you realize the compare and exchange operations :) (that's why, of course, if you want to sort the sequence of object with significant size - better build its index array and sort it :)).
By the way, why the most people so like quick-sort algorithm? Statistically, it's not the best algoritms, at least it lose to heap-sort, for example (under "better" or "worther" I mean the estimation as I said before - based on counts of operation made during working).

regards, SD

TheCPUWizard
December 1st, 2002, 10:32 PM
Two quick comments...

1) AnthonyMai's statement....

"Basically std::sort is broken whenever you have some class where appropriate copy constructors can not be written."

If you mean the compiler can not write them the point is meaningless. good design requires the 5 "auto" functions should ALWAYS be implemented for EVERY class. If the statement was meant that a programmer could not write a copy constructor for a class, I would surely like to see a sample.

2)Gabriel's Sample...

It has been stated that this implementation is "poor" because an invalid object can be copied and become value. A quick tweak (also for the assignment operator)

SomeClass(const SomeClass& src)
{
if (src.self_ != src)
throw "someException"
}
self_=this; val_=src.val_;
}

takes care of the problem. Of course Gabriel may have INTENDED to write a self repairing class. On a more serious note, I have found tht is is ALMOST always better to implement both the copy constructor and assignment operator in terms of a comment protected/private utility method, the same goes for (= and !=) (> and <).

jflegert
December 2nd, 2002, 07:59 AM
Originally posted by TheCPUWizard
...
good design requires the 5 "auto" functions should ALWAYS be implemented for EVERY class.
...

What are the 5 "auto" functions? contructor, destructor, copy, assignment, ???

Thanks,
John Flegert

Gabriel Fleseriu
December 2nd, 2002, 08:09 AM
Originally posted by TheCPUWizard
2)Gabriel's Sample...

It has been stated that this implementation is "poor" because an invalid object can be copied and become value. A quick tweak (also for the assignment operator)


takes care of the problem. Of course Gabriel may have INTENDED to write a self repairing class.

I was asked to write a class that breaks Qsort but works with std::sort. My point was to demonstrate the existence of such a class -- and not to demonstrate good style, good taste or the usefulness of such a class.

TheCPUWizard
December 2nd, 2002, 08:54 AM
John,

The last one is the equality operator [==].

TheCPUWizard
December 2nd, 2002, 08:56 AM
Gabriel,

I was NOT intending to knock your sample in ANY way. You provided exactly what was asked for (a class that would break qsort). My comments were directed more at those who were implying that there was something wrong with your class and that is WHY it would break QSort. The tweaks I added would enhance your class so that it would be beyond these criticisms.


David.

jflegert
December 2nd, 2002, 09:09 AM
Originally posted by TheCPUWizard
Two quick comments...

...
good design requires the 5 "auto" functions should ALWAYS be implemented for EVERY class.
...

I would disagree with this. Why implement any of the 5 "auto" functions if they are not meant to be used or if the default implementation is sufficient?

Thanks,
John Flegert

TheCPUWizard
December 2nd, 2002, 10:05 AM
John, My reasoning for implementing all 5 is simple. It is my experience [10 years C++ development] that it is easier in the long run. Why?

1) When setting policy to get consistent results from a team of programmers a fixed policy which does not involve "judgment calls" is easier to define, implement and maintain.

2) Any class which contains a pointer (almost) always need custom a copy constructor and assignment operator. Those which want to have the same pointer value should be explicitly documented in any case.

3) Any class which does internal reference counting, needs a copy constructor.

4) A class that today does not "need" a copy constructor will often be "upgraded" to contain a new data member that requires a copy constructor. As a (contrived example), consider a class with starts with 5 data members all built-in non-pointer types. This does not need a copy constructor or assignment operator to behave correctly. Over time additional members are added, they are all either build in types or well behaved classes, this goes on until there are 50 data members. THEN IT HAPPENS, a pointer data member is required. At this point the copy constructor and assignment operator are needed. The programmer now has to write the code for alll 51 members!!!!. Much better to amortize the cost (time) since day 1. It is hard to justify (to boss or client) why addding one "little pointer" requires a week of testing to insure that all 50 of the other members are being properly copied!!!.

5) Some classes which consist of just built in types and well behaved classes STILL require a (user written)copy constructor. Consider a simple object that counts the number of times a certain method has been called FOR THAT INSTANCE. This is often the type of item added for debugging and code coverage testing. Should a copy constructor be written (again having to implement and test ALL of the members) when this is added and then deleted when this is removed????

6) Many programmers never intend for their objects to be copied or assigned. They just dont think about it. If you REQUIRE them to write these functions, they must think. Either write a valid public method, or write an empty private method (explicitly stating that "you cant to this").


The above covered mainly the copy constructor and assignment operator....

Default Constructor : All members should ALWAYS be initialized so this is required.

Equality operator : A great many classes contain private data members that are not pertinent to equality testing [lazy evaluation patterns, item #5 above, etc.)

In short liefe is EASIER for everybody (in the long run) if these routines are ALWAYS implemented.


David.

jflegert
December 2nd, 2002, 11:42 AM
David,

Thank you for expanding on your statement. I just thought that your original statement was that it was a standard practice in the "Industry".

I myself just don't see the need to ALWAYS do it (from my 16 years of development and 10+ years of C++ programming).

1) When setting policy to get consistent results from a team of programmers a fixed policy which does not involve "judgment calls" is easier to define, implement and maintain.
I agree. But also,

1) have a policy that you will train your programmers so they know the language,

2) hire competent people who are capable of making intelligent decisions so that you don't need to impose unnecessary policies on programmers.

The above is with a smiley face, but should also be taken to heart.

3) Now you have a policy that gives your programmers a false sense of security. The programmer thinks: "I followed the policy, why think about the actual design".

4) I would tell the programmers to do more up front design to decide what is necessary.
2) Any class which contains a pointer (almost) always need custom a copy constructor and assignment operator. Those which want to have the same pointer value should be explicitly documented in any case.
Agreed. Since I can't quantify a word like "almost".
3) Any class which does internal reference counting, needs a copy constructor.
Don't forget the assignment operator.
4) A class that today does not "need" a copy constructor will often be "upgraded" to contain a new data member that requires a copy constructor. As a (contrived example), consider a class with starts with 5 data members all built-in non-pointer types. This does not need a copy constructor or assignment operator to behave correctly. Over time additional members are added, they are all either build in types or well behaved classes, this goes on until there are 50 data members. THEN IT HAPPENS, a pointer data member is required. At this point the copy constructor and assignment operator are needed. The programmer now has to write the code for alll 51 members!!!!. Much better to amortize the cost (time) since day 1. It is hard to justify (to boss or client) why addding one "little pointer" requires a week of testing to insure that all 50 of the other members are being properly copied!!!.
I quess I would look at the flip side. If there was no reason to have a copy constructor in the first place, and you require people to write one, you are lengthening your development cycle unnecessarily. Your clients would probably be happier if you could deliver quicker and more reliably.
5) Some classes which consist of just built in types and well behaved classes STILL require a (user written)copy constructor. Consider a simple object that counts the number of times a certain method has been called FOR THAT INSTANCE. This is often the type of item added for debugging and code coverage testing. Should a copy constructor be written (again having to implement and test ALL of the members) when this is added and then deleted when this is removed????
Well, if it was just for debugging, I would suggest putting the stuff in #if blocks.
6) Many programmers never intend for their objects to be copied or assigned. They just dont think about it. If you REQUIRE them to write these functions, they must think. Either write a valid public method, or write an empty private method (explicitly stating that "you cant to this").
Again, hire competent people. I would agree with your statement but instead of "empty", I would tell them to declare them as private and not implement them.
Default Constructor : All members should ALWAYS be initialized so this is required.
I disagree, if there is no reason to allow a default constructor to be called, declare the default constructor as private and don't implement it.
Equality operator : A great many classes contain private data members that are not pertinent to equality testing [lazy evaluation patterns, item #5 above, etc.)
I agree. But you don't mention the case where equality testing isn't even valid.
In short liefe is EASIER for everybody (in the long run) if these routines are ALWAYS implemented.
I disagree.

Regards,
John Flegert

TheCPUWizard
December 2nd, 2002, 12:33 PM
John, I respect your opinions. They are based on your experiences the same as my opinions are based on my experiences.

I would just like to relate some of the details of the project that caused the formulation of this (albiet strict) set of design "rules" [for use in our firm, NOT the entire industry].

About 6 years ago, I contraced for a large project. Since we are a small firm, almost all of the work was handled by sub-contractors. We created top level designs and left the internal design up to the various contractors. During the two years of development we needed to move the work between the different contractors (1 we fired, 1 went bankrupt) as well as balance the work load to keep everybody active.

We ran into significant problems when transfering code from one cmpany to another. There were steep learning curves since each firm had their own set of techniques and styles. The items that caused delays were often trivial in nature (e.g. do "simple" method implementations go int the hpp or cpp file?, how are inlines handled?, are accessors necessary for reading and/or writing an int data member). The code was rapidly becoming VERY difficult to maintain as the various styles all started to appear within a single class.

We decided on implementing a complete software design and style guide. All internal programmers and exernal firms are required to adhere to these standards and practices.

Since this was implemented 4 years ago, we have been able to expand. Most of our implementation is done by a number of small firms literally around the globe. A class written by one company can easily be maintained and updated by another company, and it is virtually impossible to determine (without looking at comments or other documentation) which company has performed the work.

I agree that hiring the brightest and the best is always a good idea. I feel that having this policy really DOES allow the designer to concentrate on the BEST designs techniques available (e.g. Selecting the best Collection Class, implementing a Design Pattern in a complete and robust fashion).

As a final note, we make heavy use of full-cycle design tools. Most of our code-base is generated (at least as frameworks) from tools that parse UML structures (as well as generate them from the final code. Code analysis tools are used to produce the documentation as well as validating that the final code is in compliance with out policies.

Did this cost time (therefore money)? Yes, but it has proved to be the best investment we could have possibly made. Even with the downturn of the past 12-18 months, we are estimating that our earnings will be up over 30% this year.

In cases such as this, the BEST SOLUTION is the one that works best for the PERSON WHO HAS TO LIVE WITH IT.

David.

Graham
December 2nd, 2002, 04:36 PM
Originally posted by TheCPUWizard
John,

The last one is the equality operator [==].
Nope. The fifth one is the address-of operator: operator&().

TheCPUWizard
December 2nd, 2002, 04:54 PM
Sorry Graham, you are of course correct. I was looking at our internal documents, not the standard. I need to double check the source that I am using more often. However, if I spend any more time on this (and other) boards, I may succeed in not performing any real work at all.


Mea Culpa
David.

Graham
December 2nd, 2002, 04:59 PM
Following on from jflegert's post, with which I (almost) entirely agree:

Originally posted by TheCPUWizard

Default Constructor : All members should ALWAYS be initialized so this is required.
Like jflegert, I disagree, but with a slightly different codacil: Explicitly declare ANY ctor and the compiler will not generate the default ctor. If you don't need a default ctor (a likely situation; that's why the rule is there), there is nothing to be gained by declaring it (private or otherwise). Better to simply leave it as a class that has no default ctor.

Conversely, if you don't want a default ctor, then you will need some other sort of ctor, and that will supress the default. So, there is never a need to explicitly supress the default ctor.

As for copy ctor/copy assign: personally I'd leave them unless the compiler-generated ones aren't sufficient.
A class that today does not "need" a copy constructor will often be "upgraded" to contain a new data member that requires a copy constructor.
Then add the copy ctor when it becomes necessary. In my view (and I know: it's just my view), the presence of an explicit copy ctor should document the fact that there is something special about this class. It takes a while to look at a copy ctor and note that it does no more than the compiler-generated one. What is the point of:

class foo
{
public:
foo(const foo&) {}
private:
// data members
};

jflegert's point about competent programmers is valid, though: we can't design systems on the assumption that people are all novices or less than fully competent. Sure, some will be (we've all got to learn sometime), but that's what training and colleagues are for.

TheCPUWizard
December 2nd, 2002, 07:14 PM
Graham, et. al.

As I stated. this is MY personal/professional OPINION. It is right FOR ME, because it works BEST for ME (and by extension those whom do work for my firm). I do not mean that this is what "everyone should do". At the very least other people should take a look at your, mine and John's optinions and make their own CONSISTENT (i.e. Always or only when required) decision.

Your point about auto generation of the parameterless constructor is 100% on the money.

As a side note your copy constructor is distinctly different than what the default copy constructor is (I am sure this was either unintentional or simply in the interest of brevity). As written, your copy constructor states that the target object should contain the same values as a default constructed object and should ignore the state of the source object. Again, I am sure this is not what you intended.

I think this discussion has been fruitful since it presents differing viewpoints without resorting to the "you are wrong" syndrome. Thanks to all for their ideas.

Graham
December 3rd, 2002, 04:28 AM
Yes, I should have put the full initialiser list on: which leads to the point that, if you leave the copy constructor to the compiler, then data member additions will be automatically handled correctly (apart from those, etc.......), whereas by declaring a redundant one, you've got to remember to update it for each new member. And what are the odds on someone forgetting? And doesn't that counterbalance the argument about when the new data member demands a copy ctor? Anyway, my principle reason is the documentary one, since ctors have to be maintained and considered with all changes the state list of a class - I just like to have an explicit ctor (or any of the compiler-generated funcs) mean that there is something special going on. Good grief, the number of times I create a class in MSVC++ and the first thing I do is to delete the ctor and the (really annoying) virtual dtor that it puts in. That really bugs me, actually: the virtual dtor that VC++ automatically gives you. It shows a fundamental wrong-headedness on the part of whoever put it in.

Yep, this is about personal opinions but, as you say, I think it's important to have these debates - someone may have an argument that you've not heard before that you find convincing (happens to me every time I read a Scott Meyers article). Maybe those who are learning can glean something useful and so improve their skills.

TheCPUWizard
December 3rd, 2002, 06:37 AM
COMPLETE agreement on the need for this type of discussion/debate. When I first read Scott Meyers "Effective C++" back in 1994 [when I started serious C++ development], I disagreed with about 30 (of his 50) points. After implementing a number of systems, talking with other (more experienced) people [the "expo's" were better back then, not just sales events], my thoughts on his points changed dramatically.

I no longer think any of his points are wrong, although I still prefer to use some alternative methods.

Your point on the maintance of constructors is well taken. The object framework (developed in house over the years) that we use dictates the explicit use of copy constructors (most classes have a "Tracker" object member that must NOT be copied as it needs to be in its initial state at the creation of the containing object). Also as I mentioned in a previous post, we use a number of tools to create/maintain/document/validate all of our code, this provides a good check for un-initialized members.

Question: You state you often delete the default constructor template. Assume you have a class that just has a single int data member. How do you handle its initialization???

Regarding default virtual dtors (MSVC++), [Scott Meyers #14 covers this rather well], I thing MS decided that since inheriting from a class without a virtual destructor can induce many headaches, whereas having a virtual destructor when a plain destructor will suffice [albiet at the potential increase in object weight], it was better to err in the direction that they did. [I am NOT saying they were right, just that there was come point behind it...]. "Add Class" is basically a wizard, there are other methods of adding a class to a project...

If we are going to discuss "When to make a destructor virtual", I think that would be a great topic for a new thread.

David.

AnthonyMai
December 3rd, 2002, 07:53 AM
1) AnthonyMai's statement....

"Basically std::sort is broken whenever you have some class where appropriate copy constructors can not be written."

If you mean the compiler can not write them the point is meaningless. good design requires the 5 "auto" functions should ALWAYS be implemented for EVERY class. If the statement was meant that a programmer could not write a copy constructor for a class, I would surely like to see a sample.


Was it you forgot or was it your ignorance? A guy with your kind experience should have seem plenty of examples of class objects that simple can not and should not be copied, or that once you make a copy something changes and it no longer looks the same.

Singleton is one example. By its very definition singletons can NOT be copied and a copy assignment operator should never be needed.

Auto pointers are another example. You can assign an auto pointer just like you assign a regular pointer. But unlike assigning regular pointers where no side effect happens, once an auto pointer is assigned, the reference count changes, so you don't get a duplicated instance of the auto pointer class, it is no longer the same as the original instance since the reference count is not the same, even the original instance has been altered during the copying.

So in the case of auto pointer, you simply can not make "true" copy of the objects. Once you attempt to copy it, it changes the original. This is why you would run into trouble trying to do std::sort() on objects where auto pointers are involved. Here is a case where nothing has broken the standard C++ rule, neither the auto pointers nor the std::sort() has broken the C++ rules. But put them together and it doesn't work. And in this case Qsort() would break C++ rules but it works.

The point I want to make is NOTHING works on every thing on every situations. You can find plenty of cases where Qsort doesn't work and std::sort() works (except not on VC++, BTW), but I can also find plenty of cases where std::sort() doesn't work but Qsort works. And also the bottom line is whether something complies with std C++ is absolutely NOT a golden rule for whether something would work or not.

I concede that in GCC when virtual data members are involved, Qsort() would have problem.

For the time being, I believe Qsort works on virtually every conceiveable type of classes on VC++. On some other compilers, Qsort may not work when there is virtual data member or virtual inheritance involved (virtual functions are OK), but works just fine when no virtual data member or virtual inheritance is involved. If some one can find an example where Qsort still doesn't work under this confinement, I would like to know.

Since virtual inheritances and virtual data members are seldom used, comparing with virtual functions which are much more common place, Qsort has a pretty broad domain of usefulness, probably even broader than std::sort(), which requires both the availability of STL and that the object must be appropriately copyable and the copy operator must be available and accessible.

jflegert
December 3rd, 2002, 08:04 AM
For the time being, I believe Qsort works on virtually every conceiveable type of classes on VC++. On some other compilers, Qsort may not work when there is virtual data member or virtual inheritance involved (virtual functions are OK), but works just fine when no virtual data member or virtual inheritance is involved. If some one can find an example where Qsort still doesn't work under this confinement, I would like to know.
What about the class Gabriel posted on the first page?

TheCPUWizard
December 3rd, 2002, 08:45 AM
Anthony, your two examples are both cases where a copy constructor needs to be written or at least declared. The problems arise when you DONT explicitly do it and allow the compiler to do it for you.

In the case of Singleton's the (usually) correct copy constructor is just a private declaration with no implementation. This prevents accidental copies.

In the case of "Smart" pointers the method varies. It can be either a private declaration as above (if the designer wishes to prevent copies) or a "proper" copy constructor that handles the required functions (ownership, reference counting, or what ever else makes the pointer "smart").

My position on the various sort implementations is that anything that RELIES on a feature that is not covered by the standard is WRONG.

Additionally, it is my opinion that anything that can be done to allow the compiler to check for errors is better than having to count on a human always getting it right or having runtime checks (I do agree that there are times this is not possible). The worst scenario is when a delivered program operates incorrectly (a crash or unhandled exception being the ultimate incorrect operation).

Graham
December 3rd, 2002, 08:48 AM
David: I often delete the default constructor because a large proportion of the classes I write are not default constructable - it's as simple as that. I suppose I'd like a wizard that has a set of checkboxes:

Default constructable?
Copyable?
Acts as base class?

My objection to the virtual destructor is, I suppose, more philosophical than anything. A class is part of a (polymorphic) hierarchy or it isn't. That decision really ought to be made very early in the design stage... if a class changes from one to the other at a late stage, I would suggest that that implies that the class is not well-defined and the design is suspect. It is possible that a useful stand-alone class could later profitably become the base for a polymorphic family, but I feel that that's the exception, not the rule. Therefore, I consider virtual destructors (like many other things) to be a matter of deliberate choice rather than "well, it does no harm". And also there's the documentary aspect: I consider a virtual destructor to imply strongly that there is a polymorphic inheritance hierarchy here.

AnthonyMai
December 3rd, 2002, 09:01 AM
For the time being, I believe Qsort works on virtually every conceiveable type of classes on VC++. On some other compilers, Qsort may not work when there is virtual data member or virtual inheritance involved (virtual functions are OK), but works just fine when no virtual data member or virtual inheritance is involved. If some one can find an example where Qsort still doesn't work under this confinement, I would like to know.



What about the class Gabriel posted on the first page?


In a broad sense Gabriel's case can be considered a case where a class data member (val_) is explicitly "virtualized", using the visible virtual pointer (the self_ pointer). You don't access val_ directly. In stead you access it through a pointer self_, by changing self_ you could be pointing to different values of val_. Actually virtual data members are usually implemented in pretty similar ways, except that real virtual pointers are invisible and they are only changed when inheriting from a base class.

TheCPUWizard
December 3rd, 2002, 09:24 AM
Graham,

That would be a better wizard...Remember any of us can always create an add-in that does exactly what we like.

Agree that a virtual destructor DOES imply that the class is intended to be polymorphic in nature, and a non-virtual implies the opposite. One of the things I like about other languages is the ability to EXPLICITLY state these items (e.g. keywords such as final, override). Alas C++ does not inforce these items and gives rise to unusual behaviour such as:

1) Base (CBase) class member is not virtual, first level derived class (CLevel1):public CBase) IS DECLARED virtual, second level derived class CLevel2:public CLevel1) also overrides the function.
Case a CLevel2 object to a CLevel1 object and the CLevel2 method still gets called, Case EITHER derived class to a CBase pointer and the base gets called. know someone will post a class where this is intended...]

2) A user is developing a class based on a deep and/or complex hierarchy and declares a "new" method (missing the fact this is already a method of one of the base classes). This gets really messy if the accidently overriden method was declared virtual.

Alas, ANY language (or specific compiler, debugger, IDE) has its limitations as well as its benefits.

Graham
December 3rd, 2002, 11:09 AM
Absolutely. That's why we're professionals and get paid a lot..... when we're in work, that is :(

jflegert
December 4th, 2002, 06:56 AM
David,
Originally posted by TheCPUWizard
...
1) Base (CBase) class member is not virtual, first level derived class (CLevel1):public CBase) IS DECLARED virtual, second level derived class CLevel2:public CLevel1) also overrides the function.
Case a CLevel2 object to a CLevel1 object and the CLevel2 method still gets called, Case EITHER derived class to a CBase pointer and the base gets called. know someone will post a class where this is intended...]
...
If I understand your example, it sounds like behaviour I would expect. What did you expect?

Thanks,
John

TheCPUWizard
December 4th, 2002, 08:08 AM
John,

It is EXACTLY the behaviour you SHOULD expect, IF you are looking carefully at the code. On the other hand if you are not the developer of the code, or have been away from it for a while, it is easy to make a mistake and get unexpected results.

As an example, if there was an existing collection of (CBase) objects that was performing a number of operations already, and you added a call to the aforementioned method, it would be easy to make an incorrect assumption.

This is a case that is well known to the experienced programmer, and is usually avoided. My original post on this sub-topic was that other languages DO have either warnings or errors that would at least alert the programmer that different (virtual/non-virtual) behaviours are occuring [e.g. "CDerived1::f() hides CBase::f()"]

I am NOT advocating a switch to these other languages, each language has it's merits for specific application. About 80& of our development is in C++ (most of the remaining is in C#/VB.NET for Web application). On the other hand, I AM pointing out that even experienced programmers and gurus are human (at leat I think so :) ), and DO make mistakes. Having tools to prevent or at least alert the person is a great boon when schedules are tight and people are overworked/tired.

David.

jflegert
December 4th, 2002, 08:45 AM
David,

I believe you are trying to make a valid point, I just think you are using a bad example. If someone has a collection of CBase objects, and calls a non virtual method of CBase, what could they be expecting?

Now, to me, the following code may be confusing (if one was looking at class C and not paying much attention to class B):

class B
{
public:
virtual void foo() { TRACE("B::foo()\n"); }
};

class C : public B
{
public:
void foo() { TRACE("C::foo()\n"); }
};

class D : public C
{
void foo() { TRACE("D::foo()\n"); }
};

void f()
{
D d;

C *c = &d;

c->foo();
}

"D::foo()" is printed.

John

TheCPUWizard
December 4th, 2002, 09:06 AM
John,

Agreed that my sample may not be the best. I used it because last year that exact problem bit one of our staff.

He was looking as some code that was commented as "Process Class Specific Operations". All of the existing functions WERE virtual in the base. The function that was to be added was INCORRECTLY not declared as virtual in the base. He looked at the header for "CLevel1" the function he needed to add was declared as virtual and all of the existing functions were in the same area of the header and also declared virtual. HE SIMPLY MADE A MISTAKE by not looking at the base class (which is what the pointer was declared as).

As a result, the code was not ready in time for our European test site (client also in Europe) to run the anticipated test during their day time shift. We ended up paying over $500 for them to sit idle that night.

If there had been a tool which would have produced some type of notification, this loss of time and (quantifiable) money would have been avoided.

David.

SuperKoko
November 12th, 2005, 12:46 PM
Arg, I would love templated typedefs The workarounds are not always pretty

Me too.
You probably noticed that typedef is particulary useful and even needed when dealing with templates, and that in some cases template typedefs could be needed.

However templated typedefs can be replaced by an alternative method which needs more typing, but has the same features.
Like that:

template<typename T>
class SharedVector
{
public:
typedef std::vector<boost::shared_ptr<T> > value_type;
};
// now, you can use SharedVector<T>::value_type

I found that particulary useful with specialization:

template <class T>
class FastConstReference
{
public:
typedef const T& value_type;
};
template <>
class FastConstReference<int>
{
public: typedef int value_type;
};
// similar code for all builtin types.
// now we can write a container using FastConstReference<T>::value_type at some points were a const reference is needed for efficiency with big objects, and values are still faster with builtin types.


I hope that C++0x will include typedefs templates...
:)

Siddhartha
November 12th, 2005, 01:32 PM
SuperKoko, I hope you realize that you have replied to a thread that's been sleeping for 3.5 years... ;)

SuperKoko
November 12th, 2005, 02:27 PM
I just realized that now...
:D