I have used the following code to write and read wchar_t bytes from a disk file:
Code:
int WriteBytesW(wchar_t * wcp, int nsz, wchar_t * wcfilepath)
{
wfstream wf;
codecvt_utf16<wchar_t, 0x10ffff, little_endian> ccvt(1);
locale wloc(wf.getloc(), &ccvt);
wf.imbue(wloc);
wf.open(wcfilepath, ios::out | ios::binary);
if(!wf) { wprintf(_T("Unable to open file %s"), wcfilepath); return 0; }
wf.write((wchar_t *) wcp, (streamsize)(nsz));
wf.close();
return 1;
}// WriteBytesW(wchar_t * wcp, int nsz, wchar_t * wcfilepath)
/// reads raw bytes from a file all at once
/// see: http://www.cplusplus.com/reference/istream/istream/tellg/
int ReadBytesW(wchar_t * wcfilepath, wchar_t * pwbuf, long &lsz)
{
wfstream wf;
codecvt_utf16<wchar_t, 0x10ffff, little_endian> ccvt(1);
locale wloc(wf.getloc(), &ccvt);
wf.imbue(wloc);
// see: http://www.codeguru.com/forum/showthread.php?t=511113
wf.open(wcfilepath, ios::in|ios::binary);
if(!wf) { wprintf( _T("Unable to open file %s"), wcfilepath); return 0; }
// get length of file:
wf.seekg (0, wf.end);
int length = wf.tellg();
wf.seekg (0, wf.beg);
lsz = length;
pwbuf = new wchar_t [length+1];
wmemset(pwbuf, 0x0000, length+1);
wf.read(pwbuf, (streamsize) length);
wf.close();
// print content
for(int i = 0; i < length/2; i++)
{
printf("%0.4X ", pwbuf[i]);
}
printf("\n");
delete [] pwbuf; pwbuf = 0;
return 1;
}// ReadBytesW(wstring wsfilepath)
I have run this simple experiment where the wide byte 0xFFFF is present or absent.
Code:
int _tmain(int argc, _TCHAR* argv[])
{
wchar_t wbuf[10];
wbuf[0] = 0x1234;
wbuf[1] = 0x5678;
wbuf[2] = 0x9abc;
wbuf[3] = 0xef12;
wbuf[4] = 0xabcd;
wbuf[5] = 0xfe21;
wbuf[6] = 0xdcba;
wbuf[7] = 0x1f2a;
wbuf[8] = 0xefff;
wbuf[9] = 0x02ff;
int n = WriteBytesW(wbuf, 10, _T("bravo.dat"));
if(n) { printf("save bytes succeeded\n"); } else { printf("save bytes failed\n"); }
wchar_t * wbuf2 = 0;
long nsz = 0;
n = ReadBytesW(_T("bravo.dat"), wbuf2, nsz);
if(n) { printf("read bytes succeeded\n"); } else { printf("read bytes failed\n"); }
return 0;
}
Output:
save bytes succeeded
1234 5678 9ABC EF12 ABCD FE21 DCBA 1F2A EFFF 02FF
read bytes succeeded
nsz =: 20
Now, if wbuf[8] = 0xefff; is replaced by wbuf[8] = 0xffff;
Output:
save bytes succeeded
1234 5678 9ABC EF12 ABCD FE21 DCBA 1F2A 02FF
read bytes succeeded
nsz =: 18
Obviously, the 0xffff wbyte is not read. WHY ?
This presents a significant problem when attempting to read ALL wbytes from a file. Is there any work around ? Is this a VS problem ?