Click to See Complete Forum and Search --> : Help with debugging


AKRichard
October 21st, 2011, 03:06 AM
Hello again all,

I have been working on creating a base class that holds a number in an array of unsigned shorts, the algorithms for the basic math procedures appears to work correctly however, when I run the algorithm to convert the array into a string of the decimal representation of the number I am getting a strange behavior while debugging. If I let the procedure run without any break points, it gives an incorrect value right from the start (even though the string representation it spits out is good till about 3/4 the way through). If I set up 2 break points in specific places to check on the values and run it, the algorithm gives the correct results at every point including what it returns to the calling routine.

I am using microsoft visual c++ 2008 express. I have not read anything that would indicate that I have to watch out for how break points affect running code. The code listings that follow are the relevant ones to the algorithm.

The multiplication routine:


unsigned short* MyBase::Multiply(unsigned short* _val1offset, unsigned short* _val2offset){

unsigned short car=0;
unsigned int _v1=0;
unsigned int _v2=0;
unsigned int _r=0;
unsigned int _temp=0;
unsigned int _retpos=1;
unsigned int _v1pos=1;
unsigned int _v2pos=1;
unsigned int _two=2;

unsigned int _val1size=(*_val1offset);
unsigned int _val2size=(*_val2offset);
unsigned int _retvalsize=_val1size+_val2size;
_retvalsize+=2;

unsigned short* _retval=NULL;

_retval=new unsigned short[_retvalsize];
_retval[0]=_retvalsize-1;

_retvalsize-=1;

__asm{

mov edi, dword ptr _val1offset
mov esi, dword ptr _val2offset
mov ebx, dword ptr _retval
xor ecx, ecx
mov _v1pos, 0x0
mov _v2pos, 0x0
mov _retpos, 0x0

inc edi
inc edi
inc esi
inc esi
add ebx, 0x2
xor eax, eax

ClearLoop:

mov word ptr [ebx],ax
inc ecx
add ebx, 0x2
cmp ecx, _retvalsize
jle ClearLoop
mov cl, 0x10
mov ebx, dword ptr _retval
add ebx, 0x2

MultLoop:

movzx eax, word ptr [edi]
mov _temp, eax //mov v1 element into eax
movzx eax, word ptr [esi]
mul _temp //multiply v1 element by v2 element
mov _temp, eax
movzx eax, word ptr [ebx]
add eax, _temp //add retval in that element to value
mov _temp, eax
movzx eax, car
add eax, _temp // add the carry in
mov _temp, eax
and ax, 0xffff
mov word ptr [ebx], ax
xor eax, eax
mov car, ax
mov eax, _temp
shr eax, cl
mov car, ax
inc edi
inc edi
add ebx, 0x2
mov eax, _v1pos
add eax, 0x1
mov _v1pos, eax
cmp eax, _val1size
jb MultLoop
movzx eax, car
mov word ptr [ebx], ax
xor eax, eax
mov _v1pos, eax
mov car, ax
mov edi, dword ptr _val1offset
inc edi
inc edi
mov ebx, dword ptr _retval
add ebx, 0x2
mov eax, _retpos
add eax, 0x1
mov _retpos, eax
mul _two
add ebx, eax
//mov ebx, eax
mov eax, _v2pos
add eax, 0x1
mov _v2pos, eax
inc esi
inc esi
//mov _v2pos, eax
cmp eax, _val2size
jb MultLoop
mov eax, dword ptr _retval

mov ebx, dword ptr _retval
mov edi,_retvalsize
xor eax, eax

CheckForZero:

mov ax, word ptr [ebx + edi * 2]
cmp ax, 0x0
jne GoOut
dec edi
mov word ptr [ebx], di
cmp edi, 0x0
ja CheckForZero

GoOut:

mov eax, dword ptr _retval



}

return _retval;

}


The exponentiation routine:


unsigned short* MyBase::Pow(unsigned short* _baseoffset, unsigned short* _expoffset){//stuff needs fixed


unsigned int _b=0;//loc _base
unsigned int _e=0;//loc _exp
unsigned int _r=0;//loc _retval
unsigned int _t=0;//loc _tempoffset

unsigned int _sixteen=16;
unsigned int _two=2;
unsigned int _bit=16;
unsigned int _one[]={1};

unsigned int _retvalsize=0;
unsigned int _basesize=(*_baseoffset);
unsigned int _expsize=(*_expoffset);

unsigned int _mtemp=0;

unsigned short _bits[]={1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768};

unsigned short* _retval=NULL;
unsigned short* _tempoffset=NULL;
unsigned short _temp;
unsigned short _temp2;
unsigned short _shift=0;

unsigned short* _tpow=NULL;

_tpow=new unsigned short[2];
_tpow[0]=1;
_tpow[1]=0;

_retvalsize=_basesize*_expoffset[1];

_retval=new unsigned short[_retvalsize+1];
_retval[0]=_retvalsize;

_tempoffset=new unsigned short[_retvalsize+1];
_tempoffset[0]=_retvalsize;

unsigned short * _ttt=NULL;
_ttt=new unsigned short[2];

unsigned int _tttp=0;

__asm{

entry:

mov ebx, dword ptr _ttt
mov _tttp,ebx
mov ebx, dword ptr _baseoffset
mov _b,ebx
mov ecx, dword ptr _expoffset
mov _e,ecx
mov edx, dword ptr _retval
mov _r,edx
mov ebx, dword ptr _tempoffset
mov _t,ebx

start:

mov ebx, _r
mov edx, _t
mov edi, 0x1
xor eax, eax

ClearLoop:

mov word ptr [ebx + edi * 2], ax
mov word ptr [edx + edi * 2], ax
inc edi
cmp edi, _retvalsize
jle ClearLoop
mov edx, _b
xor edi, edi
mov ax, 0x1
mov word ptr [ebx + 2], ax

SetRet:

mov ax, word ptr [edx + edi * 2]
mov word ptr [ebx + edi * 2], ax
inc edi
cmp edi, _basesize
jle SetRet
mov ebx,_b
mov edi, _basesize

CheckBase:

mov ax, word ptr [ebx + edi * 2]
cmp ax, 0x0
jne CheckExponent
dec edi
cmp edi, 0x1
jge CheckBase
mov ax, word ptr [ebx + edi * 2]
cmp ax, 0x0
je ReturnZero
cmp ax, 0x1
je ReturnOne

CheckExponent:

mov ebx, _e
mov edi, _expsize
mov eax, _expsize
cmp eax, 0x1
jg ExpLoop
cmp eax, 0x0
je ReturnOne
mov ax, word ptr [ebx + edi * 2]
cmp ax, 0x0
je ReturnOne
cmp ax, 0x1
je ReturnBase

ExpLoop:

mov ax, word ptr [ebx + edi * 2]
cmp ax, 0x0
jne FindStart
dec edi
cmp edi, 0x1
jge ExpLoop
mov ax, word ptr [ebx + edi * 2]
cmp ax, 0x0
je ReturnOne
cmp ax, 0x1
je ReturnBase
mov ebx, _b
mov edx, _r
mov edi, 0x1

CopyBase:

//mov ax, word ptr [ebx + edi * 2]
//mov word ptr [edx + edi * 2], ax
//inc edi
//cmp edi, _basesize
//jle CopyBase
//mov ebx, _e
//mov edi, _expsize

FindStart:

mov esi, 0xf
mov ax, word ptr [ebx + edi * 2]
mov _temp, ax
mov cl, 0x1

FindOne:

mov ax, _temp
mov _temp2, ax
sal _temp, cl
jc BeginAlg
dec esi
cmp esi, 0x0
jge FindOne
dec edi
cmp edi, 0x0
jg FindStart
mov esi, 0xf
mov ax, word ptr [ebx + edi * 2]
mov _temp, ax

FindOne2:

//mov ax, _temp
//mov _temp2, ax
sal _temp, cl
jc BeginAlg
dec esi
cmp esi, 0x0
jge FindOne2
mov esi, 0xf
dec edi
cmp edi, 0x1
jl ReturnOne
mov ax, word ptr [ebx + edi * 2]
mov _temp, ax
jmp FindOne2

BeginAlg:

//mov ax, _temp2
//mov _temp, ax

StartAlg:
LoopAlg:

push eax
push ebx
push ecx
push edx
push edi
push esi
mov eax, _r
push eax
push eax
call Multiply
mov dword ptr _tpow, eax

Ret1:

mov _r, eax
mov _tttp,eax
mov _ttt,eax
pop eax
pop eax
pop esi
pop edi
pop edx
pop ecx
pop ebx
pop eax
sal _temp, cl
jc DoMult2

Ret2:

dec esi
cmp esi, 0x0
jg StartAlg
mov esi, 0x10
dec edi
cmp edi, 0x1
jl GoOut
mov ax, word ptr [ebx + edi * 2]
mov _temp, ax
jmp LoopAlg

DoMult2:

push eax
push ebx
push ecx
push edx
push edi
push esi
mov eax, _r
push eax
mov eax, _b
push eax
call Multiply
mov dword ptr _tpow, eax
mov _r, eax
mov _tttp,eax
mov _ttt,eax
pop eax
pop eax
pop esi
pop edi
pop edx
pop ecx
pop ebx
pop eax
jmp Ret2

ReturnOne:

mov ebx, _r
mov edi, _retvalsize

LoopItOne:

mov word ptr [ebx + edi * 2], 0x0
dec edi
cmp edi, 0x1
jg LoopItOne
mov word ptr [ebx], 0x1
mov word ptr [ebx + 2], 0x1
jmp GoOut

ReturnZero:

mov ebx, _r
xor edi, edi
inc edi

LoopItTwo:

mov word ptr [ebx + edi * 2], 0x0
inc edi
cmp edi, _retvalsize
jle LoopItTwo
jmp GoOut

ReturnBase:

mov ebx, _b
mov edx, _r
xor edi, edi

LoopItThree:

mov ax, word ptr [ebx + edi * 2]
mov word ptr [edx + edi * 2], ax
inc edi
cmp edi, _basesize
jle LoopItThree

GoOut:

mov eax, _r
mov _retval, eax

}

return _retval;

}


the subtraction routine:


unsigned short* MyBase::Subtract(unsigned short* _val1offset, unsigned short* _val2offset){

unsigned short car=0;
unsigned short _temp;
unsigned int _short=0;
unsigned int _val1size=(*_val1offset);
unsigned int _val2size=(*_val2offset);
unsigned int _retvalsize=_val1size;

if(_val1size>=_val2size){

_short=_val2size;

}

else{

_short=_val1size;

}

unsigned short* _retval=NULL;
unsigned int _two=2;

_retval=new unsigned short[_retvalsize+1];
_retval[0]=_retvalsize;

__asm{

mov ebx, dword ptr _val1offset
mov edi, dword ptr _val2offset
mov esi, dword ptr _retval
xor eax,eax
mov car,ax

ZeroOut:

mov ax, word ptr [ebx]
mov word ptr [esi], ax
inc esi
inc esi
add ebx, 0x2
movzx edx, car
add edx, 0x1
mov car, dx
cmp edx,_val1size
jbe ZeroOut
mov edi, dword ptr _val2offset
mov esi, dword ptr _retval
xor eax,eax
xor ebx, ebx
mov car,ax
inc edi
inc edi
inc esi
inc esi
add ebx, 0x1

sub1:

mov ax, word ptr [esi]
cmp ax, word ptr [edi]
ja DontDoIt
mov _temp,0x1

DontDoIt:

sub ax, word ptr [edi]
sub ax, car
mov cx, _temp
mov car, cx
mov _temp, 0x0
//cmp ax, 0x0
//ja DontAdd
//add ax, 0xffff
//mov car, 0x1

DontAdd:

mov word ptr [esi], ax
inc edi
inc edi
inc esi
inc esi
add ebx, 0x1
cmp ebx, _val2size
jbe Sub1

Val2Over:

cmp ebx, _val1size
ja GoOut
mov ax, car
cmp ax, 0x0
je GoOut
mov ax, word ptr [esi]
sub ax, 0x1
mov car, 0x0
cmp ax, 0x0
jae DontAdd2
add ax, 0xffff
mov car, 0x1

DontAdd2:

mov word ptr [esi], ax
inc esi
inc esi
add ebx, 0x1
jmp Val2Over

GoOut:

mov eax, _retvalsize
mul _two
mov esi, dword ptr _retval
add esi, eax
mov ecx, _retvalsize

CheckForZero:

mov ax, word ptr [esi]
cmp ax, 0x0
ja OutOfHere
dec esi
dec esi
dec ecx
cmp ecx, 0x0
ja CheckForZero

OutOfHere:

mov esi, dword ptr _retval
mov word ptr [esi],cx
mov eax, dword ptr _retval




}




return _retval;

}


and the tostring routine (notice where I put the break points marked bp1 and bp2:


unsigned short* MyBase::ConvertToString(unsigned short* _val1offset){

unsigned short* _retval=NULL;

unsigned short* _tn=NULL;
unsigned short* _exponent=NULL;
unsigned short* _pow=NULL;
unsigned short* _mult=NULL;
unsigned short* _mult2=NULL;
unsigned short* _b10=NULL;
unsigned short* _digit=NULL;

unsigned int _t=0;
unsigned int _e=0;
unsigned int _p=0;
unsigned int _m=0;
unsigned int _m2=0;
unsigned int _b=0;
unsigned int _d=0;
unsigned int _v1=0;
unsigned int _r=0;
unsigned int _ba=0;
unsigned int _da=0;

unsigned int _exponentsize=2;
unsigned int _powsize=2;
unsigned int _multsize=2;
unsigned int _mult2size=2;
unsigned int _b10size=2;
unsigned int _digitsize=2;
unsigned int _val1size=(*_val1offset);
unsigned int _retvalsize=_val1size*5;
unsigned int _tnsize=_val1size;

unsigned int _exp=_val1size*5;
unsigned int _base=10;

unsigned short _barray[]={0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39};
unsigned short _dec[]={'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};

unsigned int _docount=1;

_retval=new unsigned short[_exp];
_retval[0]=_exp;

_tn=new unsigned short[_val1size+1];
_tn[0]=_val1size;

_exponent=new unsigned short[2];
_exponent[0]=1;

_pow=new unsigned short[2];
_pow[0]=1;

_mult=new unsigned short[2];
_mult[0]=1;

_mult2=new unsigned short[2];
_mult2[0]=1;

_b10=new unsigned short[2];
_b10[0]=1;
_b10[1]=_base;

_digit=new unsigned short[2];
_digit[0]=1;
_digit[1]=0;

__asm{

mov eax, dword ptr _barray
mov _ba, eax
mov eax, dword ptr _dec
mov _da, eax
mov edi, dword ptr _retval
inc edi
mov eax, dword ptr _retval
mov _r, eax
mov eax, dword ptr _tn
mov _t, eax
mov eax, dword ptr _exponent
mov _e, eax
mov eax, dword ptr _pow
mov _p, eax
mov eax, dword ptr _mult
mov _m, eax
mov eax, dword ptr _mult2
mov _m2, eax
mov eax, dword ptr _b10
mov _b, eax
mov eax, dword ptr _digit
mov _d, eax
mov eax, dword ptr _val1offset
mov _v1, eax
xor eax, eax
mov ebx, _r
mov esi, eax
xor edi, edi
inc esi
inc edi

ClearRet: //String^ _retval="";

mov word ptr [ebx + esi * 2], ax
inc esi
cmp esi, _val1size
jle ClearRet
mov esi, _val1size
mov ebx, _t
mov edx, _v1

CopyVal:

mov ax, word ptr [edx + esi * 2]
mov word ptr [ebx + esi * 2], ax
dec esi
cmp esi, 0x0
jg CopyVal
mov ax, word ptr [edx]
mov word ptr [ebx], ax
mov ebx, _e
xor ecx, ecx
mov ecx, _exp

DecExp://while(_pow>_tn)

mov word ptr [ebx + 2], cx
push edi
push ecx
push _e
push _b
call MyBase::Pow //_pow=Pow(_b10,_exp);
mov dword ptr _pow, eax
pop edx
pop edx
push _t
push eax
call MyBase::GT //if(_pow>_tn){
pop edx
pop edx
pop ecx
pop edi
cmp eax, 0x0
je FoundExp
dec ecx
mov _exp, ecx
mov ebx, _e
jmp DecExp //_exp-=1

FoundExp://while(_exp>=0){

xor eax, eax
mov ebx, _e
mov word ptr [ebx +2], cx

DigitLoop:

mov ebx, _d
mov word ptr [ebx + 2], ax
push edi
push eax
push ecx
push _e
push _b
call MyBase::Pow // _pow=Pow(_base,_exp);
mov dword ptr _pow, eax
mov _p, eax
pop edx
pop edx
push _p
push _d
call MyBase::Multiply // _mult=_digit*_pow;
mov dword ptr _mult, eax
mov _m, eax
pop edx
pop edx
push _t
push eax
call MyBase::LT // while(_mult<_tn)
mov edx, eax
pop ecx
pop ecx
pop ecx
pop eax
pop edi
cmp edx, 0x1
jb StopInc
inc eax //_digit+=1;
cmp eax, _base //if(_digit<_base){
jl DigitLoop

StopInc:

cmp eax, _base //if(_digit<_base) ok
jl DontDecDigit
dec eax //otherwise _digit-=1
mov ebx, _d
mov word ptr [ebx + 2], ax
jmp DontDec

DontDecDigit:

push edi
push eax
push ecx
push _t
push _m
call MyBase::LTOE //if(_mult<=_tn)
pop edx
pop edx
mov edx, eax
pop ecx
pop eax
pop edi
cmp edx, 0x0
jne DontDec
dec eax //_digit-=1;
mov ebx, _d
mov word ptr [ebx + 2], ax
push edi
push eax
push ecx
push _p
push _d
call Multiply
mov _m, eax
mov dword ptr _mult, eax
pop eax
pop eax
pop ecx
pop eax
pop edi

DontDec:

push edi
push eax
push ecx
push _m
push _t
mov edx, _docount
cmp edx, 0x1
je DoCount

DoCountReturn:

mov eax, _m
mov dword ptr _mult, eax
call MyBase::Subtract// bp1 this gives correct results
mov dword ptr _tn, eax
mov _t, eax// bp1 this gives correct results
pop edx
pop edx
pop ecx
pop eax
pop edi
//xor edx, edx
//mov edx, eax
//xor eax, eax
//mov eax, edx
mov edx, _r
mov word ptr [edx + edi * 2],ax//bp2 this is the other place for a break point but this one doies not produce corrct results
inc edi
cmp ecx, 0x0
je GoOut
dec ecx
jmp FoundExp

DoCount:

xor edx,edx
mov _docount,edx
mov edx, _r
mov word ptr [edx], cx
jmp DoCountReturn

GoOut:

mov eax, dword ptr _retval



}

return _retval;

}




If I put the two break points marked bp1 in so that I can check the values contained in _tn and _mult before the subtraction and after, the program gives the correct result everytime (regardless of if I have the bp2 break point enables or not) but if I do not have the two bp1 enabled, it ALLWAYS gives an incorrect result. Am I doing something wrong here?

As a side note, I am just learning assembly so any other comments about my code are welcome.

Thanks in advance

Eri523
October 21st, 2011, 10:49 AM
Am I right to assume that this is from the managed project we already discussed? If so, could some of the arrays passed to your functions be managed arrays that have not been pinned (http://msdn.microsoft.com/en-us/library/1dz8byfh.aspx) properly? In that case the garbage collector may move them away in memory behind the scenes, thereby invalidating some of your native pointers. The behavior of the GC is supposed to be indeterministic from the perspective of your program, but perhaps the debugger influences that in some way.

At any rate, I think answering the following questions is probably helpful to narrow down the search for the bug: Is the wrong result you get always the same? And if not, is the position in the number where the wrong digits start always the same?

BTW, your functions Pow() and ConvertToString() seem to leak memory. (Also, I don't see any reason to dynamically allocate arrays of fixed size.)

AKRichard
October 21st, 2011, 01:52 PM
At any rate, I think answering the following questions is probably helpful to narrow down the search for the bug: Is the wrong result you get always the same? And if not, is the position in the number where the wrong digits start always the same?

BTW, your functions Pow() and ConvertToString() seem to leak memory. (Also, I don't see any reason to dynamically allocate arrays of fixed size.)


No the wrong result is NOT allways the same (and I have been using the same numbers being sent the algorithm every ntime) it does however give the same set of wrong numbers. What I mean by that is if I keep hitting the button to give me the string of the value, it will return a set of results that cycle every 10 or 15 times, then start from the same (incorrect) result again.

Yes, it does start giving incorrect results at the same digit every time.

the numbers in the array being sent to the alg are: 9, 39587, 12476, 61574, 14471, 16809, 7112, 34882, 5580, 1.

which in decimal form equates to:369258147036925814703692581470369258147.

Now dont laugh too much, I was having a problem with not knowing the size of the arrays after being passed to the native routines, so I sent the size of the arrays as the first element in the array and just have my algs ignore the first element (except to get the size). so in the above example the 9 would essentially get stripped out of it.

When I run the algorithm without break points the returned number would be correct up to the 3rd 4 (so that 3692581470369258147036925814xxxxxxxxxxx). It would allways give the same incorrect result the first time I hit the to string button on the form, and it allways cycles through the same series of incorrect results.

The calling routine uses the following to call into the native code:

String^ MyMath::MiniBI::CallToString(array<unsigned short,1>^ _val){

pin_ptr<unsigned short> _v1 = &_val[0];

unsigned short* _valn=MyBase::ConvertToString(_v1);


so the value should be pinned. and I know it is giving the correct results anyways if I have the breakpoints in the positions shown by bp1 in the other post. When I have the break points set up, after I check the values in the mult and tn arrays I just hit the run button again till it hits the next break point. But, if I have the single break point set up where it saves the result, I noticed that the very first calculation is off, specifically the second element in the tn array is off, the rest of the elements are correct, the tn array is the temp number of the value passed in, so that I can subtract from it to get the string. I just dont understand why it would give correct results with the break points set up and incorrect without them.

Oh and yes, this is the same managed project we have been talking about. I am still working on my native c++ , thats why you see some ugly looking code in there (like the dynamic arrays that dont need to be dynamic), and about the memory leaks, I have some VERY bad habits to overcome, I was strictly a vb programmer till a few years ago till teaching myself c++/cli, and now working on native c++, I keep forgetting about the fact I have to manually free my memory, I havent noticed it because I have only run the native code far enough to make sure that my functions are working correctly (or not), then I kill it, thats why I mentioned if you noticed anything else to feel free and point it out.

BTW, good to see you again. I told you Id be back :) Thanks for the help bud

AKRichard
October 22nd, 2011, 02:45 AM
Well I finally figured out what was wrong though I am still unclear as to why it would give the correct answer with the break points set. I had forgot to clear the _temp value at the beggining of the algorithm. I inserted one line and it fixed it. Any ideas as to why the break points made a difference though? Just to make sure it wasnt me, I commented out that line of code and tried it with andf without the break points again. Sure enough, it allways gave the incorrect result without the breakpoints, and allways gave the correct result with the break points.

I had to mess around with it before I found the problem, I took out the breakpoints in the ConvertToString routine, and put a breakpoint in the beggining of the subtract routine I immediately founbd the problem then.

I would still like to know why setting breakpoints made a difference so I could avoid that problem again. Any thoughts?

Eri523
October 22nd, 2011, 05:45 AM
I had forgot to clear the _temp value at the beggining of the algorithm.

:confused: There is no single _temp in all the four functions you posted!

However, if you post the context of that variable so that its relation to the posted functions becomes clear, I may try to find an explanation for your observations (which may or may not lead to any meaningful result).

At any rate, uninitialized variables are well known to lead to undefined behavior, as has been stated so many times all around CG, and that can be literally anything, including the seemingly "partially reasonable" results you got, which then may pretty much lead into the wrong direction when debuging.

AKRichard
October 23rd, 2011, 03:07 AM
I know, All of the algorithms have a _temp in them, the one in particular I am talking about though is in the subtract routine:



unsigned short* MyBase::Subtract(unsigned short* _val1offset, unsigned short* _val2offset){

unsigned short car=0;
unsigned short _temp;
unsigned int _short=0;
unsigned int _val1size=(*_val1offset);
unsigned int _val2size=(*_val2offset);
unsigned int _retvalsize=_val1size;

if(_val1size>=_val2size){

_short=_val2size;

}

else{

_short=_val1size;

}

unsigned short* _retval=NULL;
unsigned int _two=2;

_retval=new unsigned short[_retvalsize+1];
_retval[0]=_retvalsize;

__asm{

mov ebx, dword ptr _val1offset
mov edi, dword ptr _val2offset
mov esi, dword ptr _retval
xor eax,eax
mov car,ax

ZeroOut:

mov ax, word ptr [ebx]
mov word ptr [esi], ax
inc esi
inc esi
add ebx, 0x2
movzx edx, car
add edx, 0x1
mov car, dx
cmp edx,_val1size
jbe ZeroOut
mov edi, dword ptr _val2offset
mov esi, dword ptr _retval
xor eax,eax
xor ebx, ebx
mov _temp, ax
mov car,ax
inc edi
inc edi
inc esi
inc esi
add ebx, 0x1

sub1:

mov ax, word ptr [esi]
cmp ax, word ptr [edi]
ja DontDoIt
mov _temp,0x1

DontDoIt:

sub ax, word ptr [edi]
sub ax, car
mov cx, _temp
mov car, cx
mov _temp, 0x0 <----adding this line fixed the problem
//cmp ax, 0x0
//ja DontAdd
//add ax, 0xffff
//mov car, 0x1

DontAdd:

mov word ptr [esi], ax
inc edi
inc edi
inc esi
inc esi
add ebx, 0x1
cmp ebx, _val2size
jbe Sub1

Val2Over:

cmp ebx, _val1size
ja GoOut
mov ax, car
cmp ax, 0x0
je GoOut
mov ax, word ptr [esi]
sub ax, 0x1
mov car, 0x0
cmp ax, 0x0
jae DontAdd2
add ax, 0xffff
mov car, 0x1

DontAdd2:

mov word ptr [esi], ax
inc esi
inc esi
add ebx, 0x1
jmp Val2Over

GoOut:

mov eax, _retvalsize
mul _two
mov esi, dword ptr _retval
add esi, eax
mov ecx, _retvalsize

CheckForZero:

mov ax, word ptr [esi]
cmp ax, 0x0
ja OutOfHere
dec esi
dec esi
dec ecx
cmp ecx, 0x0
ja CheckForZero

OutOfHere:

mov esi, dword ptr _retval
mov word ptr [esi],cx
mov eax, dword ptr _retval




}




return _retval;

}


I couldve probably just added =0 to the declaration (like I did to all the other variables). I shouldve caught it earlier, but when it was giving correct results with the breakpoints set (in the ConvertToString routine) I wasnt sure how to proceed.

I have to admit, I am having a hard time with some of the native stuff, especially arrays. As you pointed out, I dont need dynamic arrays in quite a few places, but I could not get the algorithms to work correctly unless I did it this way. I have a feeling it has something to do with the way I am using pointers, I thought they were pretty much like reference handles in managed c++, but not quite.

On the other hand, the assembly code for the algorithms went a lot smoother than I thought it would. I have all the basic functions (Add, Subtract, Multiply, Divide, Pow, ModPow, and bit shifts) written and most of them have been debugged and tested now, though I cheated on the Mod algorithm, I used the division algorithm and just returned the remainder. I had the montgomery reduction working but, it does not give the result I am looking for (it gives TR(-1) mod N , T time the multiplicative inverse of R mod N that is), anyhow, I am working on making those algorithms more efficient by using other addressing modes, since, if you havent noticed, all my adressing of variables uses the approach you showed me last time I was here.

I know you must be pretty busy, so you dont have to waste time on the original post, though I am curious to see if you get the same results as I did using the breakpoints in the same places.

Thanks again bud.

Eri523
October 23rd, 2011, 08:30 PM
I know, All of the algorithms have a _temp in them, the one in particular I am talking about though is in the subtract routine:

D'oh! :eek: I really should have seen it there in the fourth code line. :blush: And after I didn't find it on my own, I even copy-pasted your code into Notepad++ which then confirmed my wrong belief. I vaguely recall this is not the first time its search function failed to find a variable name with an underscore. Looks like there's a bug in there (version 5.9.3)...

I couldve probably just added =0 to the declaration (like I did to all the other variables).

I think that would've done.

I shouldve caught it earlier, but when it was giving correct results with the breakpoints set (in the ConvertToString routine) I wasnt sure how to proceed.

That's the mean thing about undefined behavior: It can lead to pretty confusing results. This is a perfect example of that.

I have to admit, I am having a hard time with some of the native stuff, especially arrays. As you pointed out, I dont need dynamic arrays in quite a few places, but I could not get the algorithms to work correctly unless I did it this way. I have a feeling it has something to do with the way I am using pointers, [...].

Using local fixed-size arrays in inline assembly is a bit different from using arrays that have been passed as a parameter or dynamically allocated (which basically behave the same from the assembly language POV). Here's a modified version of my demo program from http://www.codeguru.com/forum/showthread.php?t=516212 which uses a local fixed-size array, with the relevant change highlighted in red:


// Test8a.cpp

#include <cstdio>

char achFormat[] = "%d\n";

void DoIt()
{
int an[10];
for (int i = 0; i < 10; ++i)
an[i] = 101 + i;

__asm {
mov esi, offset achFormat
lea ebx, an
xor edi, edi
loop000:
push dword ptr [ebx + edi * 4]
push esi
call dword ptr [printf]
add esp, 8
inc edi
cmp edi, 10
jb loop000
}
}

int main()
{
DoIt();
}


The reason why the LEA instruction must be used here instead of the MOV instruction in combination with the OFFSET operator like for the global array achFormat is that an actually expands to [ebp-30h] which of course doesn't have an offset in the strict sense. It evaluates to an offset, however, and that is what LEA loads into the destination register.

[...] I thought they were pretty much like reference handles in managed c++, but not quite.

They're similar in behavior to some extent but their technical implementation is way different. One of the most important differences is that the .NET reference type instances to which the tracking handles refer are reference counted, making them subject to garbage collection when their reference count reaches zero. In that sense, if you really want something to compare, the tracking handles are probably more similar to a native C++ shared_ptr than to a raw pointer.

I know you must be pretty busy, so you dont have to waste time on the original post, though I am curious to see if you get the same results as I did using the breakpoints in the same places.

My degree of busyness varies with time, but actually at the time I am rather busy. How did you know? :cool:

I'm probably not going to conduct any live tests with your code unless I have a readily compileable environment to test it in, but that doesn't seem to be necessary anymore anyway now. However, what I can tell you is that the "contents" of uninitialized local variables depends on what was on the stack earlier, used by functions on the same or lower (or perhaps even higher if they were called by same-or-lower-level functions) "call depth level" than the one that hosts the uninitialized variable. This stack space has already been released before the function in question is called but still holds the old (and now meaningless) data. This may, to some extent, explain the circular repetition of wrong results you observed.

The fact that your results varied depending on which breakpoints you set when debugging may be explained by the debugger using the stack of your app for its own purposes to some extent which is not unlikely and completely uncritical unless you "index down the stack" into the released area (which is strongly discouraged because it's hackish and extremely unsafe) or, like here, have uninitialized local variables, which essentially have a quite similar effect to down-indexing. Imagine the debugger happens, by chance, to zero out the place where your _temp sits: All of a sudden it would behave as if it was initialized. But beware of what happens when the debugger doesn't do that, which, of course, you have no real way to influence... ;)

Finally, a few words about coding style (which, as we know, of course isn't really mandatory unless it's a policy of your employing company): The meaning of the register names ESI and EDI is "source index" and "destination index". In your Subtract() function, however, you use EDI to point into _val2offset and ESI to point into _retval which obviously is the output array, thereby reversing that paradigm. As you have experienced, that does work (and in fact it will work for anything you can do with these registers except for the string operations), yet it confuses developers who are used to reading hand-written assembly language (like me), thus making the code harder to read.

Also, while I'm generally not quite a friend of excessive commenting of code, IMO assembly language programs of meaningful size (and I'd say the functions we're discussing here belong into that category) should be extensively commented since their degree of self-documentation tends to be unusually low, compared to most of the other languages. In the assembly language programs (of meaningful size) I used to write, practiacally every line containing actual assembly language instructions was commented, as well as many of the other lines like, e.g., variable declarations. Overall, I think at least 50% of the volume of my assembly language source code used to be comments.

AKRichard
October 25th, 2011, 04:34 AM
Using local fixed-size arrays in inline assembly is a bit different from using arrays that have been passed as a parameter or dynamically allocated (which basically behave the same from the assembly language POV). Here's a modified version of my demo program from http://www.codeguru.com/forum/showthread.php?t=516212 which uses a local fixed-size array, with the relevant change highlighted in red:



ya in my first post in this section I was trying to use the lea instruction to load pointers for the variables that were passed into the function.

finally, a few words about coding style (which, as we know, of course isn't really mandatory unless it's a policy of your employing company):


I am just a dumb a$$ carpenter that enjoys challenging my mind for some reason, so I dont have anyone to report to. But I would still like my code to somewhat comply with what real programmers use (it makes coming in here and asking questions easier too).

The meaning of the register names ESI and EDI is "source index" and "destination index". In your Subtract() function, however, you use EDI to point into _val2offset and ESI to point into _retval which obviously is the output array, thereby reversing that paradigm. As you have experienced, that does work (and in fact it will work for anything you can do with these registers except for the string operations), yet it confuses developers who are used to reading hand-written assembly language (like me), thus making the code harder to read.


The original idea in the Subtract routine was to use the _retval as one of the variables (which I did), but I was trying to use the form of the subtract instruction that would save the result into the variabe so that instead of having something like

mov ax, word ptr[ebx +esi * 2]
sub ax, word ptr[edx + edi * 2]
mov word ptr [edx + edi *2], ax

it was supposed to look more like

mov ax, word ptr[ebx + esi * 2]
sub word ptr [edx + edi * 2], ax

thereby shaving off one instruction, but ran into a problem with the subtract with carry instruction (sbc) There are all kinds of instructions that mess with the carry flag as I found out the hard way. I think once I implement the loop instruction Ill be able to set it up the way I pictured in my head. And I believe if I start using the registers as per the examples Ive seen, not only will it be easier to understand, but it will probably solve some of my problems with the registers.

Also, while I'm generally not quite a friend of excessive commenting of code, IMO assembly language programs of meaningful size (and I'd say the functions we're discussing here belong into that category) should be extensively commented since their degree of self-documentation tends to be unusually low, compared to most of the other languages. In the assembly language programs (of meaningful size) I used to write, practiacally every line containing actual assembly language instructions was commented, as well as many of the other lines like, e.g., variable declarations. Overall, I think at leat 50% of the volume of my assembly language source code used to be comments.

Ive actually started commenting the assembly code, not every line, but what Ive been doing is writing the function in C++ then I would copy the C++ code to the inline assembly and insert that as comments so that I could look at it and see what I had in mind for that section.

Eri523
October 25th, 2011, 09:11 AM
[...] There are all kinds of instructions that mess with the carry flag as I found out the hard way. I think once I implement the loop instruction Ill be able to set it up the way I pictured in my head. And I believe if I start using the registers as per the examples Ive seen, not only will it be easier to understand, but it will probably solve some of my problems with the registers.

Yeah, a lot of instructions interfere with the C flag, perhaps more than one might think in the first place. IMO it's one of the two most important flags, the other one being Z, which is even more "fragile". The LOOP instruction, however, doesn't touch any of the flags, which is one of the nicest properties of that instruction.

Ive actually started commenting the assembly code, not every line, but what Ive been doing is writing the function in C++ then I would copy the C++ code to the inline assembly and insert that as comments so that I could look at it and see what I had in mind for that section.

Well, yes, there's not always a need for a comment in every line. That's what I meant by the "practically" in my paragraph you quoted. For instance, the following would be rather goofy in most contexts:


inc esi ; Advance the pointer
inc esi ; Advance it some more


(This two-line construct was used earlier as a replacement for ADD ESI,2 on weaker CPUs to save a few clock cycles and code bytes, but in most scenarios it doesn't yield any relevant gain anymore nowadays.)

I'm not sure whether commenting assembly language with C++ source code necessarily is a good idea, though. Often there's no direct 1-to-1 relation between assembly language code and hypothetical C++ code, which perhaps is one of the reasons why you started writing assembly language code in the first place. Obviously, rearranging your assembly language code to match the C++ comments may sacrifice part of the efficiency gain you got from the assembly language code.

BTW, currently I'm facing my own debugging nightmare. :sick: Without assembly language, but instead with multithreading and IPC...

AKRichard
October 25th, 2011, 09:42 PM
I'm not sure whether commenting assembly language with C++ source code necessarily is a good idea, though. Often there's no direct 1-to-1 relation between assembly language code and hypothetical C++ code, which perhaps is one of the reasons why you started writing assembly language code in the first place. Obviously, rearranging your assembly language code to match the C++ comments may sacrifice part of the efficiency gain you got from the assembly language code.

I just used the c++ code as comments in places so that I would know what point in the algorithm I was. especially at the tags, like:



MultLoop://while(x>0){

.
.
.
jmp MultLoop



I started using this because in the longer routines it was difficult to keep track of where I was in the algorithm. I didnt try to make a 1 - 1 correlation with assembly, but with snippets of c++ code I knew what that section of assembly was supposed to resemble or replace.
BTW, currently I'm facing my


BTW, currently I'm facing my own debugging nightmare. :sick: Without assembly language, but instead with multithreading and IPC...


Oh what fun that is. Even with the new debugging tools out there multithreaded apps can still be quite the challenge at times. I spent some time making sure that my library was thread safe for multithreaded apps.

AKRichard
October 25th, 2011, 09:43 PM
BTW, thanks again for the help