00001
00002
00003
00004
00005 #include "textfile.h"
00006
00007 template <typename T> int TReadLine (TextFileReader *tf, T **line_buf, bool BE);
00008 template <typename T> int TReadChar (TextFileReader *tf, int strip_cr);
00009
00010
00011
00012
00013
00014 HMODULE ghResInst;
00015
00016
00017
00018 TCHAR * vFormatPString (const TCHAR *message, va_list ap)
00019 {
00020 TCHAR buf2 [T2B (4096)];
00021
00022
00023
00024 return copy_string (message);
00025 }
00026
00027
00028 TCHAR * _cdecl FormatPString (const TCHAR *message, ...)
00029 {
00030 va_list ap;
00031 va_start (ap, message);
00032 TCHAR *result = vFormatPString (message, ap);
00033 va_end (ap);
00034 return result;
00035 }
00036
00037
00038
00039 TCHAR * copy_string (const TCHAR *s)
00040 {
00041 if (s == NULL)
00042 {
00043 TCHAR *result = (TCHAR *) malloc (T2B (1));
00044 result [0] = 0;
00045 return result;
00046 }
00047
00048 TCHAR *result = (TCHAR *) malloc (T2B (_tcslen (s) + 1));
00049 return _tcscpy (result, s);
00050 }
00051
00052
00053
00054 char * AtoA (const char *a)
00055 {
00056 char *result = (char *) malloc (strlen (a) + 1);
00057 return strcpy (result, a);
00058 }
00059
00060
00061
00062
00063
00064 #define DB_SIG 'GSDB'
00065
00066 typedef struct
00067 {
00068 int signature;
00069 int space_allocated;
00070 int datalen;
00071 } DATA_BLOCK;
00072
00073
00074
00075
00076 void * alloc_block (void *current_block, int datalen)
00077 {
00078 DATA_BLOCK *pb = (DATA_BLOCK *) current_block;
00079 int space_needed;
00080
00081 space_needed = ((datalen + 1023) / 1024) * 1024;
00082 if (pb)
00083 {
00084 --pb;
00085 assert (pb->signature == DB_SIG);
00086 if (pb->space_allocated >= space_needed)
00087 goto done;
00088 }
00089
00090 pb = (DATA_BLOCK *) realloc (pb, space_needed + sizeof (DATA_BLOCK));
00091 assert (pb);
00092 pb->signature = DB_SIG;
00093 pb->space_allocated = space_needed;
00094
00095 done:
00096 pb->datalen = datalen;
00097 return pb + 1;
00098 }
00099
00100
00101
00102 void free_block (void *block)
00103 {
00104 if (block)
00105 {
00106 DATA_BLOCK *pb = (DATA_BLOCK *) block;
00107
00108 --pb;
00109 assert (pb->signature == DB_SIG);
00110 free (pb);
00111 }
00112 }
00113
00114
00115
00116 void * copy_block (const void *block)
00117 {
00118 DATA_BLOCK *pb, *pnb;
00119 int blocksize;
00120
00121 assert (block);
00122
00123 pb = (DATA_BLOCK *) block;
00124 --pb;
00125 assert (pb->signature == DB_SIG);
00126 blocksize = pb->space_allocated + sizeof (DATA_BLOCK);
00127 pnb = (DATA_BLOCK *) malloc (blocksize);
00128 assert (pnb);
00129 memcpy (pnb, pb, blocksize);
00130 return pnb + 1;
00131 }
00132
00133
00134
00135 int get_blocklen (const void *block)
00136 {
00137 DATA_BLOCK *pb = (DATA_BLOCK *) block;
00138
00139 if (pb == NULL)
00140 return 0;
00141 --pb;
00142 assert (pb->signature == DB_SIG);
00143 return pb->datalen;
00144 }
00145
00146
00147
00148 int get_buflen (const void *block)
00149 {
00150 DATA_BLOCK *pb = (DATA_BLOCK *) block;
00151
00152 if (pb == NULL)
00153 return 0;
00154 --pb;
00155 assert (pb->signature == DB_SIG);
00156 return pb->space_allocated;
00157 }
00158
00159
00160
00161
00162
00163 static inline void flip_words (WCHAR *w, int nchars)
00164 {
00165 BYTE *b = (BYTE *) w;
00166 while (nchars--)
00167 {
00168 BYTE swap = b [0];
00169 b [0] = b [1];
00170 b [1] = swap;
00171 b += 2;
00172 }
00173 }
00174
00175
00176
00177 int linebuf_UtoA
00178 (const WCHAR *u, int len, char **a, int code_page, const char *unknown_char, BOOL *data_lost)
00179 {
00180 if (len < 0)
00181 len = (int) wcslen (u);
00182 int nbytes = len * 2;
00183 *a = (char *) alloc_block (*a, nbytes + 1);
00184 int bytes_out = WideCharToMultiByte
00185 (code_page, 0, u, len, *a, nbytes, unknown_char, data_lost);
00186 assert (bytes_out <= nbytes);
00187 (*a) [bytes_out] = 0;
00188 return bytes_out;
00189 }
00190
00191
00192
00193 int linebuf_AtoU
00194 (const char *a, int len, WCHAR **u, int code_page, const char *unknown_char, BOOL *data_lost)
00195 {
00196 if (len < 0)
00197 len = (int) base_strlen (a);
00198 *u = (WCHAR *) alloc_block (*u, (len + 1) * sizeof (WCHAR));
00199 int chars_out = MultiByteToWideChar (code_page, 0, a, len, *u, len);
00200 assert (chars_out <= len);
00201 (*u) [chars_out] = 0;
00202 return chars_out;
00203 }
00204
00205
00206
00207
00208
00209 class TextFileReader
00210 {
00211 friend int TReadLine <BYTE> (TextFileReader *tf, BYTE **line_buf, bool BE);
00212 friend int TReadChar <BYTE> (TextFileReader *tf, int strip_cr);
00213 friend int TReadLine <WCHAR> (TextFileReader *tf, WCHAR **line_buf, bool BE);
00214 friend int TReadChar <WCHAR> (TextFileReader *tf, int strip_cr);
00215
00216 public:
00217
00218
00219 TextFileReader::TextFileReader (const TCHAR *filename, HANDLE hFile, int bufsize)
00220 {
00221 this->filename = filename;
00222 this->hFile = hFile;
00223 if (bufsize >= 0)
00224 {
00225 this->bufsize = (bufsize) ? bufsize : 4096;
00226 this->buf = (BYTE *) malloc (this->bufsize);
00227 }
00228 else
00229 {
00230 this->bufsize = 0;
00231 this->buf = NULL;
00232 }
00233 this->bufptr = 0;
00234 this->bufbytes = 0;
00235 this->eof = false;
00236 }
00237
00238
00239
00240 virtual TextFileReader::~TextFileReader ()
00241 {
00242 free (this->buf);
00243 }
00244
00245
00246
00247 virtual int TextFileReader::ReadChar (void)
00248 {
00249 return TF_EOF;
00250 }
00251
00252
00253
00254 virtual int TextFileReader::ReadLine
00255 (TCHAR **line_buf, int code_page, const char *unknown_char, BOOL *data_lost)
00256 {
00257 return TF_EOF;
00258 }
00259
00260
00261
00262 int TextFileReader::FillBuffer (void)
00263 {
00264 if (this->bufptr < this->bufbytes)
00265 return 0;
00266 if (this->eof)
00267 return TF_EOF;
00268
00269 DWORD bytes_read;
00270 if (!ReadFile (this->hFile, this->buf, this->bufsize, &bytes_read, NULL))
00271 {
00272
00273
00274
00275
00276 return TF_ERROR;
00277 }
00278
00279 if (bytes_read == 0)
00280 this->eof = true;
00281 this->bufptr = 0;
00282 this->bufbytes = bytes_read;
00283 return (this->eof) ? TF_EOF : 0;
00284 }
00285
00286 protected:
00287 const TCHAR *filename;
00288 HANDLE hFile;
00289 int bufsize;
00290 BYTE *buf;
00291 int bufptr;
00292 int bufbytes;
00293 bool eof;
00294 };
00295
00296
00297
00298
00299
00300 inline BYTE * t_memchr (BYTE *p, int c, size_t n) { return (BYTE *) memchr (p, c, n); };
00301 inline WCHAR * t_memchr (WCHAR *p, int c, size_t n) { return (WCHAR *) wmemchr (p, c, n); };
00302
00303 template <typename T> int TReadLine (TextFileReader *tf, T **line_buf, bool BE)
00304 {
00305 T *tbuf = (T *) alloc_block (*line_buf, sizeof (T));
00306 tbuf [0] = 0;
00307 int result = TF_EOF;
00308 int nchars = 0;
00309
00310 WCHAR lf = '\n';
00311 WCHAR cr = '\r';
00312 if (BE)
00313 {
00314 flip_words (&lf, 1);
00315 flip_words (&cr, 1);
00316 }
00317
00318 for ( ; ; )
00319 {
00320 if (tf->bufptr >= tf->bufbytes)
00321 {
00322 int result = tf->FillBuffer ();
00323 if (result < 0)
00324 break;
00325 }
00326 result = 0;
00327
00328 T *bp = (T *) (tf->buf + tf->bufptr);
00329 int chars_available = (tf->bufbytes - tf->bufptr) / sizeof (T);
00330 T *nl = t_memchr (bp, lf, chars_available);
00331 if (nl)
00332 chars_available = (int) (nl - bp);
00333
00334 int chars_to_copy = chars_available;
00335 if (chars_to_copy > 0 && bp [chars_to_copy - 1] == cr)
00336 --chars_to_copy;
00337 tbuf = (T *) alloc_block (tbuf, (nchars + chars_to_copy + 1) * sizeof (T));
00338 memcpy (tbuf + nchars, bp, chars_to_copy * sizeof (T));
00339 nchars += chars_to_copy;
00340 tbuf [nchars] = 0;
00341
00342 tf->bufptr += chars_available * sizeof (T);
00343 if (nl)
00344 {
00345 tf->bufptr += sizeof (T);
00346 break;
00347 }
00348 }
00349
00350 *line_buf = tbuf;
00351 return result;
00352 }
00353
00354
00355
00356 template <typename T> int TReadChar (TextFileReader *tf, int strip_cr)
00357 {
00358 int result;
00359
00360 do
00361 {
00362 if (tf->bufptr >= tf->bufbytes)
00363 {
00364 result = tf->FillBuffer ();
00365 if (result < 0)
00366 return result;
00367 }
00368
00369 result = * (T *) (tf->buf + tf->bufptr);
00370 tf->bufptr += sizeof (T);
00371 }
00372 while (result == strip_cr);
00373
00374 return result;
00375 }
00376
00377
00378
00379
00380
00381 class TextFileWriter
00382 {
00383 public:
00384
00385
00386 TextFileWriter::TextFileWriter (const TCHAR *filename, HANDLE hFile, int bufsize)
00387 {
00388 this->filename = filename;
00389 this->hFile = hFile;
00390 this->bufsize = (bufsize) ? bufsize : 4096;
00391 this->buf = (BYTE *) malloc (this->bufsize);
00392 this->bufptr = 0;
00393 }
00394
00395
00396
00397 virtual TextFileWriter::~TextFileWriter ()
00398 {
00399 free (this->buf);
00400 }
00401
00402
00403
00404 virtual int TextFileWriter::WriteChar
00405 (int ch, int code_page, const char *unknown_char, BOOL *data_lost)
00406 {
00407 assert (0);
00408 return TF_ERROR;
00409 }
00410
00411
00412
00413 virtual int TextFileWriter::WriteString
00414 (const TCHAR *s, int len, int code_page, const char *unknown_char, BOOL *data_lost)
00415 {
00416 assert (0);
00417 return TF_ERROR;
00418 }
00419
00420
00421
00422 virtual int TextFileWriter::WriteBOM (void)
00423 {
00424 return 0;
00425 }
00426
00427
00428
00429 int TextFileWriter::WriteBytes (const BYTE *bytes, int nbytes)
00430 {
00431 for ( ; ; )
00432 {
00433 int bufspace = this->bufsize - this->bufptr;
00434 if (bufspace > nbytes)
00435 bufspace = nbytes;
00436 memcpy (this->buf + this->bufptr, bytes, bufspace);
00437 this->bufptr += bufspace;
00438 nbytes -= bufspace;
00439 if (nbytes == 0)
00440 return 0;
00441 int result = Flush ();
00442 if (result < 0)
00443 return result;
00444 bytes += bufspace;
00445 }
00446 }
00447
00448
00449
00450 int TextFileWriter::Flush (void)
00451 {
00452 assert (this->hFile != INVALID_HANDLE_VALUE);
00453 if (this->bufptr > 0)
00454 {
00455 DWORD bytes_written;
00456 if (!WriteFile (this->hFile, this->buf, this->bufptr, &bytes_written, NULL))
00457 {
00458
00459
00460
00461
00462 return TF_ERROR;
00463 }
00464 this->bufptr = 0;
00465 }
00466
00467 return 0;
00468 }
00469
00470 protected:
00471 const TCHAR *filename;
00472 HANDLE hFile;
00473 int bufsize;
00474 BYTE *buf;
00475 int bufptr;
00476 };
00477
00478
00479
00480
00481
00482 class ANSI_TextFileReader : public TextFileReader
00483 {
00484 public:
00485
00486
00487 ANSI_TextFileReader::ANSI_TextFileReader (TextFile *tf, const TCHAR *filename,
00488 HANDLE hFile, int bufsize) : TextFileReader (filename, hFile, bufsize)
00489 {
00490 this->tf = tf;
00491 #ifdef _UNICODE
00492 this->char_buf = NULL;
00493 #endif
00494 }
00495
00496
00497
00498 ANSI_TextFileReader::~ANSI_TextFileReader ()
00499 {
00500 #ifdef _UNICODE
00501 free_block (this->char_buf);
00502 #endif
00503 }
00504
00505
00506
00507 virtual int ANSI_TextFileReader::ReadLine
00508 (TCHAR **line_buf, int code_page, const char *unknown_char, BOOL *data_lost)
00509 {
00510 #ifndef _UNICODE
00511 return TReadLine <BYTE> (this, (BYTE **) line_buf, false);
00512 #else
00513 int result;
00514 if (this->tf->GetAnsiPassThru ())
00515 result = TReadLine <BYTE> (this, (BYTE **) line_buf, false);
00516 else
00517 {
00518 result = TReadLine <BYTE> (this, &this->char_buf, false);
00519 if (result >= 0)
00520 linebuf_AtoU ((char *) this->char_buf, -1, line_buf,
00521 code_page, unknown_char, data_lost);
00522 }
00523 return result;
00524 #endif
00525 }
00526
00527
00528
00529 int ANSI_TextFileReader::ReadChar (void)
00530 {
00531 return TReadChar <BYTE> (this, '\r');
00532 }
00533
00534 private:
00535 TextFile *tf;
00536 #ifdef _UNICODE
00537 BYTE *char_buf;
00538 #endif
00539 };
00540
00541
00542
00543
00544
00545 class UTF16LE_TextFileReader : public TextFileReader
00546 {
00547 public:
00548
00549
00550 UTF16LE_TextFileReader::UTF16LE_TextFileReader (const TCHAR *filename, HANDLE hFile, int bufsize) :
00551 TextFileReader (filename, hFile, bufsize)
00552 {
00553 assert ((bufsize & 1) == 0);
00554 this->utf16_buf = NULL;
00555 }
00556
00557
00558
00559 UTF16LE_TextFileReader::~UTF16LE_TextFileReader ()
00560 {
00561 free_block (this->utf16_buf);
00562 }
00563
00564
00565
00566 virtual int UTF16LE_TextFileReader::ReadChar (void)
00567 {
00568 return TReadChar <WCHAR> (this, '\r');
00569 }
00570
00571
00572
00573 virtual int UTF16LE_TextFileReader::ReadLine
00574 (TCHAR **line_buf, int code_page, const char *unknown_char, BOOL *data_lost)
00575 {
00576 #ifdef _UNICODE
00577 return TReadLine <WCHAR> (this, line_buf, false);
00578 #else
00579 int result = TReadLine <WCHAR> (this, &this->utf16_buf, false);
00580 if (result >= 0)
00581 linebuf_UtoA (this->utf16_buf, -1, line_buf, code_page, unknown_char, data_lost);
00582 return result;
00583 #endif
00584 }
00585
00586 private:
00587 WCHAR *utf16_buf;
00588 };
00589
00590
00591
00592
00593
00594 class UTF16BE_TextFileReader : public TextFileReader
00595 {
00596 public:
00597
00598
00599 UTF16BE_TextFileReader::UTF16BE_TextFileReader (const TCHAR *filename, HANDLE hFile, int bufsize) :
00600 TextFileReader (filename, hFile, bufsize)
00601 {
00602 assert ((bufsize & 1) == 0);
00603 #ifndef _UNICODE
00604 this->utf16_buf = NULL;
00605 #endif
00606 }
00607
00608
00609
00610 UTF16BE_TextFileReader::~UTF16BE_TextFileReader ()
00611 {
00612 #ifndef _UNICODE
00613 free_block (this->utf16_buf);
00614 #endif
00615 }
00616
00617
00618
00619 virtual int UTF16BE_TextFileReader::ReadChar (void)
00620 {
00621 int result = TReadChar <WCHAR> (this, 0x0d00);
00622 if (result < 0)
00623 return result;
00624
00625 WCHAR w = result;
00626 flip_words (&w, 1);
00627 return w;
00628 }
00629
00630
00631
00632 virtual int UTF16BE_TextFileReader::ReadLine
00633 (TCHAR **line_buf, int code_page, const char *unknown_char, BOOL *data_lost)
00634 {
00635 #ifdef _UNICODE
00636 int result = TReadLine <WCHAR> (this, line_buf, true);
00637 if (result < 0)
00638 return result;
00639 flip_words (*line_buf, (int) wcslen (*line_buf));
00640 return result;
00641 #else
00642 int result = TReadLine <WCHAR> (this, &this->utf16_buf, true);
00643 if (result < 0)
00644 return result;
00645 flip_words (this->utf16_buf, (int) wcslen (this->utf16_buf));
00646 linebuf_UtoA (this->utf16_buf, -1, line_buf, code_page, unknown_char, data_lost);
00647 return result;
00648 #endif
00649 }
00650
00651 private:
00652 #ifndef _UNICODE
00653 WCHAR *utf16_buf;
00654 #endif
00655 };
00656
00657
00658
00659
00660
00661 class UTF8_TextFileReader : public TextFileReader
00662 {
00663 public:
00664
00665
00666 UTF8_TextFileReader::UTF8_TextFileReader (const TCHAR *filename, HANDLE hFile, int bufsize) :
00667 TextFileReader (filename, hFile, bufsize)
00668 {
00669 this->utf8_buf = NULL;
00670 #ifndef _UNICODE
00671 this->utf16_buf = NULL;
00672 #endif
00673 }
00674
00675
00676
00677 UTF8_TextFileReader::~UTF8_TextFileReader ()
00678 {
00679 free_block (this->utf8_buf);
00680 #ifndef _UNICODE
00681 free_block (this->utf16_buf);
00682 #endif
00683 }
00684
00685
00686
00687 virtual int UTF8_TextFileReader::ReadLine
00688 (TCHAR **line_buf, int code_page, const char *unknown_char, BOOL *data_lost)
00689 {
00690 int result = TReadLine <BYTE> (this, &this->utf8_buf, false);
00691 if (result < 0)
00692 return result;
00693
00694 #ifdef _UNICODE
00695 WCHAR **p_utf16_buf = line_buf;
00696 #else
00697 WCHAR **p_utf16_buf = &this->utf16_buf;
00698 #endif
00699
00700 BYTE *pb = this->utf8_buf;
00701 WCHAR *pw = *p_utf16_buf;
00702 int nchars = 0;
00703 int ch = 0xFFFD;
00704
00705 while (*pb)
00706 {
00707 int b1 = *pb++;
00708 if (b1 < 0x80)
00709 ch = b1;
00710 else if (b1 < 0xe0)
00711 {
00712 int b2 = *pb;
00713 if (b2)
00714 {
00715 ch = ((b1 & 0x1F) << 6) | (b2 & 0x3F);
00716 pb++;
00717 }
00718 }
00719 else if (b1 < 0xf0)
00720 {
00721 int b2 = *pb;
00722 if (b2)
00723 {
00724 pb++;
00725 int b3 = *pb;
00726 if (b3)
00727 {
00728 ch = ((b1 & 0x0F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
00729 pb++;
00730 }
00731 }
00732 }
00733 else if (b1 < 0xf8)
00734 {
00735 int b2 = *pb;
00736 if (b2)
00737 {
00738 pb++;
00739 int b3 = *pb;
00740 if (b3)
00741 {
00742 pb++;
00743 int b4 = *pb;
00744 if (b4)
00745 {
00746 ch = ((b1 & 0x07) << 18) | ((b2 & 0x3F) << 12) |
00747 ((b3 & 0x3F) << 6) | (b4 & 0x3f);
00748 pb++;
00749 }
00750 }
00751 }
00752 }
00753 else
00754 ch = 0xFFFD;
00755
00756 if (ch <= 0xFFFF)
00757 {
00758 pw = (WCHAR *) alloc_block (pw, (nchars + 1) * sizeof (WCHAR));
00759 pw [nchars++] = ch;
00760 }
00761 else
00762 {
00763 if (ch > 0x10FFFF)
00764 ch = 0x10FFFF;
00765 pw = (WCHAR *) alloc_block (pw, (nchars + 2) * sizeof (WCHAR));
00766 pw [nchars++] = C_TO_LEAD (ch);
00767 pw [nchars++] = C_TO_TRAIL (ch);
00768 }
00769 }
00770
00771 pw = (WCHAR *) alloc_block (pw, (nchars + 1) * sizeof (WCHAR));
00772 pw [nchars] = 0;
00773 *p_utf16_buf = pw;
00774
00775 #ifndef _UNICODE
00776 linebuf_UtoA (pw, nchars, line_buf, code_page, unknown_char, data_lost);
00777 #endif
00778
00779 return 0;
00780 }
00781
00782
00783
00784 int UTF8_TextFileReader::ReadChar (void)
00785 {
00786 #ifndef _UNICODE
00787 return TReadChar <BYTE> (this, '\r');
00788 #else
00789 int b1 = TReadChar <BYTE> (this, '\r');
00790 if (b1 < 0x80)
00791 return b1;
00792
00793 if (b1 < 0xe0)
00794 {
00795 int b2 = TReadChar <BYTE> (this, '\r');
00796 if (b2 < 0)
00797 return b2;
00798 return ((b1 & 0x1F) << 6) | (b2 & 0x3F);
00799 }
00800
00801 if (b1 < 0xf0)
00802 {
00803 int b2 = TReadChar <BYTE> (this, '\r');
00804 if (b2 < 0)
00805 return b2;
00806 int b3 = TReadChar <BYTE> (this, '\r');
00807 if (b3 < 0)
00808 return b3;
00809 return ((b1 & 0x0F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
00810 }
00811
00812 if (b1 < 0xf8)
00813 {
00814 int b2 = TReadChar <BYTE> (this, '\r');
00815 if (b2 < 0)
00816 return b2;
00817 int b3 = TReadChar <BYTE> (this, '\r');
00818 if (b3 < 0)
00819 return b3;
00820 int b4 = TReadChar <BYTE> (this, '\r');
00821 if (b4 < 0)
00822 return b4;
00823 return ((b1 & 0x07) << 18) | ((b2 & 0x3F) << 12) |
00824 ((b3 & 0x3F) << 6) | (b4 & 0x3f);
00825 }
00826
00827 return 0xFFFD;
00828 #endif
00829 }
00830
00831 private:
00832 BYTE *utf8_buf;
00833 #ifndef _UNICODE
00834 WCHAR *utf16_buf;
00835 #endif
00836 };
00837
00838
00839
00840
00841
00842 class ANSI_TextFileWriter : public TextFileWriter
00843 {
00844 public:
00845
00846
00847 ANSI_TextFileWriter::ANSI_TextFileWriter (const TCHAR *filename, HANDLE hFile, int bufsize) :
00848 TextFileWriter (filename, hFile, bufsize)
00849 {
00850 #ifdef _UNICODE
00851 this->char_buf = NULL;
00852 #endif
00853 }
00854
00855
00856
00857 ANSI_TextFileWriter::~ANSI_TextFileWriter ()
00858 {
00859 #ifdef _UNICODE
00860 free_block (this->char_buf);
00861 #endif
00862 }
00863
00864
00865
00866 int ANSI_TextFileWriter::WriteChar
00867 (int ch, int code_page, const char *unknown_char, BOOL *data_lost)
00868 {
00869 #ifdef _UNICODE
00870 WCHAR u [2];
00871 u [0] = ch;
00872 u [1] = 0;
00873 return WriteString (u, 1, code_page, unknown_char, data_lost);
00874 #else
00875 BYTE b = ch;
00876 return WriteBytes (&b, 1);
00877 #endif
00878 }
00879
00880
00881
00882 int ANSI_TextFileWriter::WriteString
00883 (const TCHAR *s, int len, int code_page, const char *unknown_char, BOOL *data_lost)
00884 {
00885 #ifdef _UNICODE
00886 int nbytes = linebuf_UtoA (s, len, (char **) &this->char_buf,
00887 code_page, unknown_char, data_lost);
00888 return WriteBytes (this->char_buf, nbytes);
00889 #else
00890 return WriteBytes ((BYTE *) s, len);
00891 #endif
00892 }
00893
00894 private:
00895 #ifdef _UNICODE
00896 BYTE *char_buf;
00897 #endif
00898 };
00899
00900
00901
00902
00903
00904 class UTF16LE_TextFileWriter : public TextFileWriter
00905 {
00906 public:
00907
00908
00909 UTF16LE_TextFileWriter::UTF16LE_TextFileWriter (const TCHAR *filename, HANDLE hFile, int bufsize) :
00910 TextFileWriter (filename, hFile, bufsize)
00911 {
00912 #ifndef _UNICODE
00913 this->utf16_buf = NULL;
00914 #endif
00915 }
00916
00917
00918
00919 UTF16LE_TextFileWriter::~UTF16LE_TextFileWriter ()
00920 {
00921 #ifndef _UNICODE
00922 free_block (this->utf16_buf);
00923 #endif
00924 }
00925
00926
00927
00928 virtual int UTF16LE_TextFileWriter::WriteBOM (void)
00929 {
00930 BYTE bom [2] = { 0xFF, 0xFE };
00931 return WriteBytes (bom, sizeof (bom));
00932 }
00933
00934
00935
00936 int UTF16LE_TextFileWriter::WriteChar
00937 (int ch, int code_page, const char *unknown_char, BOOL *data_lost)
00938 {
00939 #ifdef _UNICODE
00940 return WriteBytes ((BYTE *) &ch, sizeof (WCHAR));
00941 #else
00942 char a [2];
00943 a [0] = ch;
00944 a [1] = 0;
00945 return WriteString (a, 1, code_page, unknown_char, data_lost);
00946 #endif
00947 }
00948
00949
00950
00951 int UTF16LE_TextFileWriter::WriteString
00952 (const TCHAR *s, int len, int code_page, const char *unknown_char, BOOL *data_lost)
00953 {
00954 #ifdef _UNICODE
00955 return WriteBytes ((BYTE *) s, len * sizeof (WCHAR));
00956 #else
00957 len = linebuf_AtoU (s, len, &this->utf16_buf, code_page, unknown_char, data_lost);
00958 return WriteBytes ((BYTE *) this->utf16_buf, len * sizeof (WCHAR));
00959 #endif
00960 }
00961
00962 private:
00963 #ifndef _UNICODE
00964 WCHAR *utf16_buf;
00965 #endif
00966 };
00967
00968
00969
00970
00971
00972 class UTF16BE_TextFileWriter : public TextFileWriter
00973 {
00974 public:
00975
00976
00977 UTF16BE_TextFileWriter::UTF16BE_TextFileWriter (const TCHAR *filename, HANDLE hFile, int bufsize) :
00978 TextFileWriter (filename, hFile, bufsize)
00979 {
00980 this->utf16_buf = NULL;
00981 }
00982
00983
00984
00985 UTF16BE_TextFileWriter::~UTF16BE_TextFileWriter ()
00986 {
00987 free_block (this->utf16_buf);
00988 }
00989
00990
00991
00992 virtual int UTF16BE_TextFileWriter::WriteBOM (void)
00993 {
00994 BYTE bom [2] = { 0xFE, 0xFF };
00995 return WriteBytes (bom, sizeof (bom));
00996 }
00997
00998
00999
01000 int UTF16BE_TextFileWriter::WriteChar
01001 (int ch, int code_page, const char *unknown_char, BOOL *data_lost)
01002 {
01003 #ifdef _UNICODE
01004 BYTE b [2];
01005 b [0] = ch >> 8;
01006 b [1] = ch & 0xFF;
01007 return WriteBytes (b, 2);
01008 #else
01009 char a [2];
01010 a [0] = ch;
01011 a [1] = 0;
01012 return WriteString (a, 1, code_page, unknown_char, data_lost);
01013 #endif
01014 }
01015
01016
01017
01018 int UTF16BE_TextFileWriter::WriteString
01019 (const TCHAR *s, int len, int code_page, const char *unknown_char, BOOL *data_lost)
01020 {
01021 #ifdef _UNICODE
01022 int nbytes = len * sizeof (WCHAR);
01023 this->utf16_buf = (WCHAR *) alloc_block (this->utf16_buf, nbytes);
01024 memcpy (this->utf16_buf, s, nbytes);
01025 #else
01026 len = linebuf_AtoU (s, len, &this->utf16_buf, code_page, unknown_char, data_lost);
01027 int nbytes = len * sizeof (WCHAR);
01028 #endif
01029
01030 flip_words (this->utf16_buf, len);
01031 return WriteBytes ((BYTE *) this->utf16_buf, nbytes);
01032 }
01033
01034 private:
01035 WCHAR *utf16_buf;
01036 };
01037
01038
01039
01040
01041
01042
01043
01044 static inline int UtoUTF8 (int ch, BYTE b [4])
01045 {
01046 if (ch < 0x80)
01047 {
01048 b [0] = (BYTE) ch;
01049 return 1;
01050 }
01051
01052 if (ch < 0x800)
01053 {
01054 b [0] = (ch >> 6) | 0xc0;
01055 b [1] = (ch & 0x3F) | 0x80;
01056 return 2;
01057 }
01058
01059 if (ch < 0xFFFF)
01060 {
01061 b [0] = (ch >> 12) | 0xe0;
01062 b [1] = ((ch >> 6) & 0x3F) | 0x80;
01063 b [2] = (ch & 0x3F) | 0x80;
01064 return 3;
01065 }
01066
01067
01068 b [0] = (ch >> 18) | 0xf0;
01069 b [1] = ((ch >> 12) & 0x3F) | 0x80;
01070 b [2] = ((ch >> 6) & 0x3F) | 0x80;
01071 b [3] = (ch & 0x3F) | 0x80;
01072 return 4;
01073 }
01074
01075
01076 class UTF8_TextFileWriter : public TextFileWriter
01077 {
01078 public:
01079
01080
01081 UTF8_TextFileWriter::UTF8_TextFileWriter (const TCHAR *filename, HANDLE hFile, int bufsize) :
01082 TextFileWriter (filename, hFile, bufsize) {}
01083
01084
01085
01086 virtual int UTF8_TextFileWriter::WriteBOM (void)
01087 {
01088 BYTE bom [3] = { 0xEF, 0xBB, 0xBF };
01089 return WriteBytes (bom, sizeof (bom));
01090 }
01091
01092
01093
01094 int UTF8_TextFileWriter::WriteChar
01095 (int ch, int code_page, const char *unknown_char, BOOL *data_lost)
01096 {
01097 BYTE b [4];
01098 int nbytes = UtoUTF8 (ch, b);
01099 return WriteBytes (b, nbytes);
01100 }
01101
01102
01103
01104
01105
01106
01107 int UTF8_TextFileWriter::WriteString
01108 (const TCHAR *s, int len, int code_page, const char *unknown_char, BOOL *data_lost)
01109 {
01110 while (len--)
01111 {
01112 #ifdef _UNICODE
01113 int ch = *s++;
01114 if (ch >= 0xD800 && ch <= 0xDBFF)
01115 {
01116 if (len > 0)
01117 {
01118 int trail = *s++;
01119 len--;
01120 ch = (trail >= 0xDC00 && trail <= 0xDFFF) ? SP_TO_C (ch, trail) : 0xFFFD;
01121 }
01122 else
01123 ch = 0xFFFD;
01124 }
01125 #else
01126 int ch = (BYTE) *s++;
01127 #endif
01128 BYTE b [4];
01129 int nbytes = UtoUTF8 (ch, b);
01130 int result = WriteBytes (b, nbytes);
01131 if (result < 0)
01132 return result;
01133 }
01134
01135 return 0;
01136 }
01137
01138 };
01139
01140
01141
01142
01143
01144
01145 TextFile::TextFile ()
01146 {
01147 this->filename = NULL;
01148 this->filemode = 0;
01149 this->encoding = 0;
01150 this->hFile = INVALID_HANDLE_VALUE;
01151 this->file_reader = NULL;
01152 this->file_writer = NULL;
01153 this->need_bom = false;
01154 this->code_page = CP_ACP;
01155 this->unknown_char = NULL;
01156 }
01157
01158
01159
01160 TextFile::~TextFile ()
01161 {
01162 Close ();
01163 free (this->unknown_char);
01164 }
01165
01166
01167
01168 int TextFile::Open (const TCHAR *filename, int filemode, int encoding, int bufsize)
01169 {
01170 DWORD dwDesiredAccess, dwCreationDisposition;
01171 int err_id;
01172 int encoding_flags = encoding;
01173 encoding &= 0xFF;
01174
01175 if (filemode == TF_READ)
01176 {
01177 dwDesiredAccess = GENERIC_READ;
01178 dwCreationDisposition = OPEN_EXISTING;
01179
01180 err_id = IDS_CANNOT_OPEN_FILE;
01181 }
01182 else
01183 {
01184 dwDesiredAccess = GENERIC_WRITE;
01185 dwCreationDisposition = (filemode == TF_WRITE) ? CREATE_ALWAYS : OPEN_ALWAYS;
01186 if (encoding == 0)
01187 encoding = TF_ANSI;
01188 err_id = IDS_CANNOT_OPEN_FILE_FOR_WRITING;
01189 }
01190
01191 HANDLE hFile = CreateFile (filename, dwDesiredAccess, FILE_SHARE_READ, NULL,
01192 dwCreationDisposition, FILE_ATTRIBUTE_NORMAL, NULL);
01193 if (hFile == INVALID_HANDLE_VALUE)
01194 {
01195
01196
01197
01198
01199 return TF_ERROR;
01200 }
01201
01202 this->filename = copy_string (filename);
01203 this->filemode = filemode;
01204 this->encoding = encoding;
01205 this->encoding_flags = encoding_flags;
01206 this->ansi_passthru = false;
01207 this->bufsize = bufsize;
01208 this->hFile = hFile;
01209
01210 if (filemode == TF_READ)
01211 return CreateFileReader ();
01212
01213 this->need_bom = true;
01214 if (filemode == TF_APPEND)
01215 {
01216 DWORD filesize = GetFileSize (hFile, NULL);
01217 if (filesize > 0)
01218 {
01219 SetFilePointer (hFile, 0, NULL, FILE_END);
01220 this->need_bom = false;
01221 }
01222 }
01223
01224 CreateFileWriter ();
01225 return 0;
01226 }
01227
01228
01229
01230 int TextFile::Close (void)
01231 {
01232 if (this->hFile == INVALID_HANDLE_VALUE)
01233 return 0;
01234 int result = Flush ();
01235 CloseHandle (this->hFile);
01236 this->hFile = INVALID_HANDLE_VALUE;
01237 this->filemode = 0;
01238 delete this->file_reader;
01239 delete this->file_writer;
01240 free (this->filename);
01241 return result;
01242 }
01243
01244
01245
01246 int TextFile::Flush (void)
01247 {
01248 if (this->filemode == TF_READ)
01249 return 0;
01250 return this->file_writer->Flush ();
01251 }
01252
01253
01254
01255
01256 int TextFile::ReadLine (BOOL *data_lost, TCHAR **line_buf)
01257 {
01258 assert (this->filemode == TF_READ);
01259 if (data_lost)
01260 *data_lost = FALSE;
01261
01262 return this->file_reader->ReadLine (line_buf, this->code_page, this->unknown_char, data_lost);
01263 }
01264
01265
01266
01267
01268
01269
01270 int TextFile::ReadChar (void)
01271 {
01272 assert (this->filemode == TF_READ);
01273 return this->file_reader->ReadChar ();
01274 }
01275
01276
01277
01278 int TextFile::WriteString (BOOL *data_lost, const TCHAR *s)
01279 {
01280 assert (this->filemode > TF_READ);
01281 if (data_lost)
01282 *data_lost = FALSE;
01283
01284 if (this->need_bom)
01285 {
01286 int result = file_writer->WriteBOM ();
01287 if (result < 0)
01288 return result;
01289 this->need_bom = false;
01290 }
01291
01292
01293 int code_page = this->code_page;
01294 char *unknown_char = this->unknown_char;
01295 bool unix = (this->encoding_flags & TF_UNIX) != 0;
01296
01297 while (*s)
01298 {
01299 const TCHAR *nl = (unix) ? NULL : _tcschr (s, '\n');
01300 if (nl)
01301 {
01302 int len = (int) (nl - s);
01303 int result = this->file_writer->WriteString
01304 (s, len, code_page, unknown_char, data_lost);
01305 if (result < 0)
01306 return result;
01307 result = this->file_writer->WriteString
01308 (__T ("\r\n"), 2, code_page, unknown_char, data_lost);
01309 if (result < 0)
01310 return result;
01311 s = nl + 1;
01312 }
01313 else
01314 return this->file_writer->WriteString
01315 (s, (int) _tcslen (s), code_page, unknown_char, data_lost);
01316 }
01317
01318 return 0;
01319 }
01320
01321
01322
01323 int TextFile::FormatString (BOOL *data_lost, const TCHAR *s, ...)
01324 {
01325 va_list ap;
01326 va_start (ap, s);
01327 TCHAR *p = vFormatPString (s, ap);
01328 int result = WriteString (data_lost, p);
01329 va_end (ap);
01330 free (p);
01331 return result;
01332 }
01333
01334
01335
01336 int TextFile::vFormatString (BOOL *data_lost, const TCHAR *s, va_list ap)
01337 {
01338 TCHAR *p = vFormatPString (s, ap);
01339 int result = WriteString (data_lost, p);
01340 free (p);
01341 return result;
01342 }
01343
01344
01345
01346
01347
01348 int TextFile::WriteChar (BOOL *data_lost, int ch)
01349 {
01350 assert (this->filemode > TF_READ);
01351 if (data_lost)
01352 *data_lost = FALSE;
01353
01354 if (this->need_bom)
01355 {
01356 int result = file_writer->WriteBOM ();
01357 if (result < 0)
01358 return result;
01359 this->need_bom = false;
01360 }
01361
01362 if (ch == '\n')
01363 return WriteString (data_lost, __T ("\n"));
01364
01365 return this->file_writer->WriteChar (ch, this->code_page, this->unknown_char, data_lost);
01366 }
01367
01368
01369
01370 void TextFile::SetUnknownChar (const char *unknown_char)
01371 {
01372 free (this->unknown_char);
01373 this->unknown_char = (unknown_char) ? AtoA (unknown_char) : NULL;
01374 }
01375
01376
01377
01378
01379
01380 void TextFile::SetAnsiPassThru (bool passthru)
01381 {
01382 assert (this->filemode == TF_READ);
01383 if (passthru)
01384 assert (this->encoding == TF_ANSI);
01385 this->ansi_passthru = passthru;
01386 }
01387
01388
01389
01390 static int read_1_byte (TCHAR *filename, HANDLE hFile)
01391 {
01392 BYTE b;
01393 DWORD bytes_read;
01394 if (!ReadFile (hFile, &b, 1, &bytes_read, NULL))
01395 {
01396
01397
01398
01399
01400 return TF_ERROR;
01401 }
01402
01403 return (bytes_read) ? b : TF_EOF;
01404 }
01405
01406
01407
01408 static bool check_bom (TCHAR *filename, HANDLE hFile, char *bom)
01409 {
01410 while (*bom)
01411 {
01412 int b = read_1_byte (filename, hFile);
01413 if (b != * (UCHAR *) bom++)
01414 {
01415 SetFilePointer (hFile, 0, NULL, FILE_BEGIN);
01416 return false;
01417 }
01418 }
01419
01420 return true;
01421 }
01422
01423
01424
01425 int TextFile::CreateFileReader (void)
01426 {
01427 TCHAR *filename = this->filename;
01428 HANDLE hFile = this->hFile;
01429 bool do_check_bom = (this->encoding_flags & TF_NO_BOM_CHECK) == 0;
01430
01431
01432 switch (this->encoding)
01433 {
01434 case TF_ANSI:
01435 this->file_reader = Hnew ANSI_TextFileReader (this, filename, hFile, this->bufsize);
01436 return 0;
01437
01438 case TF_UTF8:
01439 if (do_check_bom)
01440 check_bom (filename, hFile, "\xEF\xBB\xBF");
01441 this->file_reader = Hnew UTF8_TextFileReader (filename, hFile, this->bufsize);
01442 return 0;
01443
01444 case TF_UTF16LE:
01445 if (do_check_bom)
01446 check_bom (filename, hFile, "\xFF\xFE");
01447 this->file_reader = Hnew UTF16LE_TextFileReader (filename, hFile, this->bufsize);
01448 return 0;
01449
01450 case TF_UTF16BE:
01451 if (do_check_bom)
01452 check_bom (filename, hFile, "\xFE\xFF");
01453 this->file_reader = Hnew UTF16BE_TextFileReader (filename, hFile, this->bufsize);
01454 return 0;
01455 }
01456
01457
01458 int b1 = read_1_byte (filename, hFile);
01459 if (b1 == TF_ERROR)
01460 return TF_ERROR;
01461 if (b1 == TF_EOF)
01462 {
01463 this->file_reader = Hnew TextFileReader (filename, hFile, -1);
01464 return 0;
01465 }
01466
01467 if (b1 == 0xFF)
01468 {
01469 int b2 = read_1_byte (filename, hFile);
01470 if (b2 == TF_ERROR)
01471 return TF_ERROR;
01472 if (b2 == 0xFE)
01473 {
01474 this->encoding = TF_UTF16LE;
01475 this->file_reader = Hnew UTF16LE_TextFileReader (filename, hFile, this->bufsize);
01476 }
01477 else
01478 {
01479 SetFilePointer (hFile, 0, NULL, FILE_BEGIN);
01480 this->encoding = TF_ANSI;
01481 this->file_reader = Hnew ANSI_TextFileReader (this, filename, hFile, this->bufsize);
01482 }
01483 return 0;
01484 }
01485
01486 if (b1 == 0xFE)
01487 {
01488 int b2 = read_1_byte (filename, hFile);
01489 if (b2 == TF_ERROR)
01490 return TF_ERROR;
01491 if (b2 == 0xFF)
01492 {
01493 this->encoding = TF_UTF16BE;
01494 this->file_reader = Hnew UTF16BE_TextFileReader (filename, hFile, this->bufsize);
01495 return 0;
01496 }
01497 }
01498
01499 if (b1 == 0xEF)
01500 {
01501 int b2 = read_1_byte (filename, hFile);
01502 if (b2 == TF_ERROR)
01503 return TF_ERROR;
01504 if (b2 == 0xBB)
01505 {
01506 int b3 = read_1_byte (filename, hFile);
01507 if (b3 == TF_ERROR)
01508 return TF_ERROR;
01509 if (b3 == 0xBF)
01510 {
01511 this->encoding = TF_UTF8;
01512 this->file_reader = Hnew UTF8_TextFileReader (filename, hFile, this->bufsize);
01513 return 0;
01514 }
01515 }
01516 }
01517
01518 SetFilePointer (hFile, 0, NULL, FILE_BEGIN);
01519 this->encoding = TF_ANSI;
01520 this->file_reader = Hnew ANSI_TextFileReader (this, filename, hFile, this->bufsize);
01521 return 0;
01522 }
01523
01524
01525
01526 void TextFile::CreateFileWriter (void)
01527 {
01528 switch (this->encoding)
01529 {
01530 case TF_ANSI:
01531 this->file_writer = Hnew ANSI_TextFileWriter
01532 (this->filename, this->hFile, this->bufsize);
01533 break;
01534
01535 case TF_UTF16LE:
01536 this->file_writer = Hnew UTF16LE_TextFileWriter
01537 (this->filename, this->hFile, this->bufsize);
01538 break;
01539
01540 case TF_UTF16BE:
01541 this->file_writer = Hnew UTF16BE_TextFileWriter
01542 (this->filename, this->hFile, this->bufsize);
01543 break;
01544
01545 case TF_UTF8:
01546 this->file_writer = Hnew UTF8_TextFileWriter
01547 (this->filename, this->hFile, this->bufsize);
01548 break;
01549
01550 default:
01551 assert (0);
01552 break;
01553 }
01554 }