Revision: 7326
http://winmerge.svn.sourceforge.net/winmerge/?rev=7326&view=rev
Author: gerundt
Date: 2010-10-20 20:04:08 +0000 (Wed, 20 Oct 2010)
Log Message:
-----------
Fix whitespace in unicoder.[cpp|h] and UniFile.[cpp|h] from R2_14
Modified Paths:
--------------
branches/R2_14/Src/Common/UniFile.cpp
branches/R2_14/Src/Common/UniFile.h
branches/R2_14/Src/Common/unicoder.cpp
branches/R2_14/Src/Common/unicoder.h
Modified: branches/R2_14/Src/Common/UniFile.cpp
===================================================================
--- branches/R2_14/Src/Common/UniFile.cpp 2010-10-20 17:50:53 UTC (rev 7325)
+++ branches/R2_14/Src/Common/UniFile.cpp 2010-10-20 20:04:08 UTC (rev 7326)
@@ -52,7 +52,7 @@
*/
UniFile::UniError::UniError()
{
- ClearError();
+ ClearError();
}
/**
@@ -195,11 +195,11 @@
/////////////
UniMemFile::UniMemFile()
-: m_handle(INVALID_HANDLE_VALUE)
-, m_hMapping(INVALID_HANDLE_VALUE)
-, m_base(NULL)
-, m_data(NULL)
-, m_current(NULL)
+ : m_handle(INVALID_HANDLE_VALUE)
+ , m_hMapping(INVALID_HANDLE_VALUE)
+ , m_base(NULL)
+ , m_data(NULL)
+ , m_current(NULL)
{
}
@@ -354,7 +354,7 @@
m_charsize = 1;
bool unicode = false;
bool bom = false;
-
+
m_unicoding = ucr::DetermineEncoding(lpByte, m_filesize, &bom);
switch (m_unicoding)
{
@@ -463,9 +463,9 @@
line.erase();
eol.erase();
LPCTSTR pchLine = (LPCTSTR)m_current;
-
+
// shortcut methods in case file is in the same encoding as our Strings
-
+
#ifdef _UNICODE
if (m_unicoding == ucr::UCS2LE)
{
@@ -560,19 +560,19 @@
}
#endif
- if (m_current - m_base + (m_charsize-1) >= m_filesize)
+ if (m_current - m_base + (m_charsize - 1) >= m_filesize)
return false;
// Handle 8-bit strings in line chunks because of multibyte codings (eg, 936)
if (m_unicoding == ucr::NONE)
{
- bool eof=true;
- LPBYTE eolptr=0;
- for (eolptr = m_current; (eolptr - m_base + (m_charsize-1) < m_filesize); ++eolptr)
+ bool eof = true;
+ LPBYTE eolptr = 0;
+ for (eolptr = m_current; (eolptr - m_base + (m_charsize - 1) < m_filesize); ++eolptr)
{
if (*eolptr == '\n' || *eolptr == '\r')
{
- eof=false;
+ eof = false;
break;
}
if (*eolptr == 0)
@@ -581,16 +581,16 @@
RecordZero(m_txtstats, offset);
}
}
- line = ucr::maketstring((LPCSTR)m_current, eolptr-m_current, m_codepage, lossy);
+ line = ucr::maketstring((LPCSTR)m_current, eolptr - m_current, m_codepage, lossy);
if (lossy && *lossy)
++m_txtstats.nlosses;
if (!eof)
{
- eol += (TCHAR)*eolptr;
+ eol += (TCHAR) * eolptr;
++m_lineno;
if (*eolptr == '\r')
{
- if (eolptr - m_base + (m_charsize-1) < m_filesize && eolptr[1] == '\n')
+ if (eolptr - m_base + (m_charsize - 1) < m_filesize && eolptr[1] == '\n')
{
eol += '\n';
++m_txtstats.ncrlfs;
@@ -606,11 +606,11 @@
return !eof;
}
- while (m_current - m_base + (m_charsize-1) < m_filesize)
+ while (m_current - m_base + (m_charsize - 1) < m_filesize)
{
- UINT ch=0;
- UINT utf8len=0;
- bool doneline=false;
+ UINT ch = 0;
+ UINT utf8len = 0;
+ bool doneline = false;
if (m_unicoding == ucr::UTF8)
{
@@ -627,7 +627,7 @@
else if (utf8len < 1 || utf8len > 4)
{
ch = '?';
- utf8len=1;
+ utf8len = 1;
}
else
{
@@ -662,7 +662,7 @@
{
// For UTF-8, this ch will be wrong if character is non-ASCII
// but we only check it against \n here, so it doesn't matter
- UINT ch = ucr::get_unicode_char(m_current+m_charsize, (ucr::UNICODESET)m_unicoding);
+ UINT ch = ucr::get_unicode_char(m_current + m_charsize, (ucr::UNICODESET)m_unicoding);
if (ch == '\n')
{
crlf = true;
@@ -725,8 +725,8 @@
/////////////
UniStdioFile::UniStdioFile()
-: m_fp(0)
-, m_data(0)
+ : m_fp(0)
+ , m_data(0)
{
m_pucrbuff = new ucr::buffer(128);
}
@@ -734,7 +734,7 @@
UniStdioFile::~UniStdioFile()
{
Close();
- delete (ucr::buffer *)m_pucrbuff;
+ delete(ucr::buffer *)m_pucrbuff;
m_pucrbuff = 0;
}
@@ -781,11 +781,11 @@
}
bool UniStdioFile::OpenCreateUtf8(LPCTSTR filename)
{
- if (!OpenCreate(filename))
+ if (!OpenCreate(filename))
return false;
SetUnicoding(ucr::UTF8);
return true;
-
+
}
bool UniStdioFile::Open(LPCTSTR filename, LPCTSTR mode)
{
@@ -800,7 +800,7 @@
bool UniStdioFile::DoOpen(LPCTSTR filename, LPCTSTR mode)
{
Close();
-
+
m_filepath = filename;
m_filename = filename; // TODO: Make canonical ?
@@ -866,7 +866,7 @@
m_charsize = 1;
bool unicode = false;
bool bom = false;
-
+
m_unicoding = ucr::DetermineEncoding(buff, bytes, &bom);
switch (m_unicoding)
{
@@ -975,8 +975,8 @@
}
ucr::buffer * buff = (ucr::buffer *)m_pucrbuff;
- ucr::UNICODESET unicoding1=ucr::NONE;
- int codepage1=0;
+ ucr::UNICODESET unicoding1 = ucr::NONE;
+ int codepage1 = 0;
ucr::getInternalEncoding(&unicoding1, &codepage1); // What String & TCHARs represent
const unsigned char * src = (const UCHAR *)line.c_str();
int srcbytes = line.length() * sizeof(TCHAR);
Modified: branches/R2_14/Src/Common/UniFile.h
===================================================================
--- branches/R2_14/Src/Common/UniFile.h 2010-10-20 17:50:53 UTC (rev 7325)
+++ branches/R2_14/Src/Common/UniFile.h 2010-10-20 20:04:08 UTC (rev 7326)
@@ -1,4 +1,4 @@
-/**
+/**
* @file UniFile.h
* @author Perry Rapp, Creator, 2003-2006
* @date Created: 2003-10
@@ -20,7 +20,7 @@
class UniFile
{
public:
-
+
/**
* @brief A struct for error message or error code.
*/
Modified: branches/R2_14/Src/Common/unicoder.cpp
===================================================================
--- branches/R2_14/Src/Common/unicoder.cpp 2010-10-20 17:50:53 UTC (rev 7325)
+++ branches/R2_14/Src/Common/unicoder.cpp 2010-10-20 20:04:08 UTC (rev 7326)
@@ -32,10 +32,11 @@
#define WC_NO_BEST_FIT_CHARS 0x00000400
#endif
-namespace ucr {
+namespace ucr
+{
// current OS version
-static bool f_osvi_fetched=false;
+static bool f_osvi_fetched = false;
static OSVERSIONINFO f_osvi;
/**
@@ -132,7 +133,7 @@
}
/**
- * @brief return #bytes required to represent Unicode codepoint as UTF-8
+ * @brief return #bytes required to represent Unicode codepoint as UTF-8
*/
int Utf8len_fromCodepoint(unsigned int ch)
{
@@ -146,7 +147,7 @@
}
/**
- * @brief How many bytes will it take to write string as UTF-8 ?
+ * @brief How many bytes will it take to write string as UTF-8 ?
*
* @param size size argument as filemapping are not 0 terminated
*
@@ -154,8 +155,8 @@
*/
unsigned int Utf8len_of_string(const wchar_t* text, int size)
{
- unsigned int len=0;
- for (int i=0; i<size; ++i)
+ unsigned int len = 0;
+ for (int i = 0; i < size; ++i)
{
int chlen = Utf8len_fromCodepoint(text[i]);
if (chlen < 1) chlen = 1;
@@ -164,7 +165,7 @@
return len;
}
/**
- * @brief How many chars in this UTF-8 string ?
+ * @brief How many chars in this UTF-8 string ?
*
* @param size size argument as filemapping are not 0 terminated
*
@@ -172,8 +173,8 @@
*/
unsigned int stringlen_of_utf8(const char* text, int size)
{
- unsigned int len=0;
- for (int i=0; i<size; )
+ unsigned int len = 0;
+ for (int i = 0; i < size;)
{
int chlen = Utf8len_fromLeadByte(text[i]);
if (chlen < 1) chlen = 1;
@@ -189,41 +190,48 @@
unsigned int GetUtf8Char(unsigned char * str)
{
/* test short cases first, as probably much more common */
- if (!(*str & 0x80 && *str & 0x40)) {
+ if (!(*str & 0x80 && *str & 0x40))
+ {
return str[0];
}
- if (!(*str & 0x20)) {
+ if (!(*str & 0x20))
+ {
unsigned int ch = ((str[0] & 0x1F) << 6)
- + (str[1] & 0x3F);
+ + (str[1] & 0x3F);
return ch;
}
- if (!(*str & 0x10)) {
+ if (!(*str & 0x10))
+ {
unsigned int ch = ((str[0] & 0x0f) << 12)
- + ((str[1] & 0x3F) << 6)
- + (str[2] & 0x3F);
+ + ((str[1] & 0x3F) << 6)
+ + (str[2] & 0x3F);
return ch;
}
- if (!(*str & 0x08)) {
+ if (!(*str & 0x08))
+ {
unsigned int ch = ((str[0] & 0x0F) << 18)
- + ((str[1] & 0x3F) << 12)
- + ((str[2] & 0x3F) << 6)
- + (str[3] & 0x3F);
+ + ((str[1] & 0x3F) << 12)
+ + ((str[2] & 0x3F) << 6)
+ + (str[3] & 0x3F);
return ch;
}
- if (!(*str & 0x04)) {
+ if (!(*str & 0x04))
+ {
unsigned int ch = ((str[0] & 0x0F) << 24)
- + ((str[1] & 0x3F) << 18)
- + ((str[2] & 0x3F) << 12)
- + ((str[3] & 0x3F) << 6)
- + (str[4] & 0x3F);
+ + ((str[1] & 0x3F) << 18)
+ + ((str[2] & 0x3F) << 12)
+ + ((str[3] & 0x3F) << 6)
+ + (str[4] & 0x3F);
return ch;
- } else {
+ }
+ else
+ {
unsigned int ch = ((str[0] & 0x0F) << 30)
- + ((str[1] & 0x3F) << 24)
- + ((str[2] & 0x3F) << 18)
- + ((str[3] & 0x3F) << 12)
- + ((str[4] & 0x3F) << 6)
- + (str[5] & 0x3F);
+ + ((str[1] & 0x3F) << 24)
+ + ((str[2] & 0x3F) << 18)
+ + ((str[3] & 0x3F) << 12)
+ + ((str[4] & 0x3F) << 6)
+ + (str[5] & 0x3F);
return ch;
}
}
@@ -322,21 +330,21 @@
wchar_t wch = (wchar_t)unich;
if (!lossy)
{
- static bool vercheck=false;
- static bool has_no_best_fit=false;
+ static bool vercheck = false;
+ static bool has_no_best_fit = false;
if (!vercheck)
{
if (!f_osvi_fetched) fetch_verinfo();
// Need 2000 (5.x) or 98 (4.10)
- has_no_best_fit = f_osvi.dwMajorVersion>=5 || (f_osvi.dwMajorVersion==4 && f_osvi.dwMinorVersion>=10);
+ has_no_best_fit = f_osvi.dwMajorVersion >= 5 || (f_osvi.dwMajorVersion == 4 && f_osvi.dwMinorVersion >= 10);
vercheck = true;
}
// So far it isn't lossy, so try for lossless conversion
TCHAR outch;
- BOOL defaulted=FALSE;
+ BOOL defaulted = FALSE;
DWORD flags = has_no_best_fit ? WC_NO_BEST_FIT_CHARS : 0;
if (WideCharToMultiByte(codepage, flags, &wch, 1, &outch, 1, NULL, &defaulted)
- && !defaulted)
+ && !defaulted)
{
String s(1, outch);
return s;
@@ -344,12 +352,12 @@
lossy = TRUE;
}
// already lossy, so make our best shot
- DWORD flags = WC_COMPOSITECHECK+WC_DISCARDNS+WC_SEPCHARS+WC_DEFAULTCHAR;
+ DWORD flags = WC_COMPOSITECHECK + WC_DISCARDNS + WC_SEPCHARS + WC_DEFAULTCHAR;
TCHAR outbuff[16];
- int n = WideCharToMultiByte(codepage, flags, &wch, 1, outbuff, sizeof(outbuff)-1, NULL, NULL);
- if (n>0)
+ int n = WideCharToMultiByte(codepage, flags, &wch, 1, outbuff, sizeof(outbuff) - 1, NULL, NULL);
+ if (n > 0)
{
- outbuff[n] =0;
+ outbuff[n] = 0;
return outbuff;
}
return _T("?");
@@ -359,7 +367,7 @@
/**
* @brief convert 8-bit character input to Unicode codepoint and return it
*/
-unsigned int byteToUnicode (unsigned char ch)
+unsigned int byteToUnicode(unsigned char ch)
{
static unsigned int codepage = CP_ACP;
// NB: Windows always draws in CP_ACP, not CP_THREAD_ACP, so we must use CP_ACP as an internal codepage
@@ -370,7 +378,7 @@
/**
* @brief convert 8-bit character input to Unicode codepoint and return it
*/
-unsigned int byteToUnicode (unsigned char ch, unsigned int codepage)
+unsigned int byteToUnicode(unsigned char ch, unsigned int codepage)
{
if (ch < 0x80)
@@ -378,8 +386,8 @@
DWORD flags = 0;
wchar_t wbuff;
- int n = MultiByteToWideChar(codepage, flags, (const char*)&ch, 1, &wbuff, 1);
- if (n>0)
+ int n = MultiByteToWideChar(codepage, flags, (const char*) & ch, 1, &wbuff, 1);
+ if (n > 0)
return wbuff;
else
return '?';
@@ -482,12 +490,12 @@
LPWSTR wbuff = &*str.begin();
do
{
- int n = MultiByteToWideChar(codepage, flags, lpd, len, wbuff, wlen-1);
+ int n = MultiByteToWideChar(codepage, flags, lpd, len, wbuff, wlen - 1);
if (n)
{
/*
- NB: MultiByteToWideChar is documented as only zero-terminating
- if input was zero-terminated, but it appears that it can
+ NB: MultiByteToWideChar is documented as only zero-terminating
+ if input was zero-terminated, but it appears that it can
zero-terminate even if input wasn't.
So we check if it zero-terminated and adjust count accordingly.
*/
@@ -505,7 +513,8 @@
}
*lossy = true;
flags ^= MB_ERR_INVALID_CHARS;
- } while (flags == 0 && GetLastError() == ERROR_NO_UNICODE_TRANSLATION);
+ }
+ while (flags == 0 && GetLastError() == ERROR_NO_UNICODE_TRANSLATION);
str = _T('?');
return str;
@@ -528,7 +537,7 @@
#ifndef UNICODE
String CrossConvertToStringA(const char* src, unsigned int srclen, int cpin, int cpout, bool * lossy)
{
- int wlen = srclen*2+6;
+ int wlen = srclen * 2 + 6;
int clen = wlen * 2 + 6;
String str;
str.resize(clen);
@@ -550,9 +559,9 @@
// Convert input to Unicode, using specified codepage
DWORD flags = 0;
- int wlen = srclen*2+6;
+ int wlen = srclen * 2 + 6;
wchar_t * wbuff = new wchar_t[wlen];
- int n = MultiByteToWideChar(cpin, flags, (const char*)src, srclen, wbuff, wlen-1);
+ int n = MultiByteToWideChar(cpin, flags, (const char*)src, srclen, wbuff, wlen - 1);
if (!n)
{
int nsyserr = ::GetLastError();
@@ -562,8 +571,8 @@
return 1;
}
/*
- NB: MultiByteToWideChar is documented as only zero-terminating
- if input was zero-terminated, but it appears that it can
+ NB: MultiByteToWideChar is documented as only zero-terminating
+ if input was zero-terminated, but it appears that it can
zero-terminate even if input wasn't.
So we check if it zero-terminated and adjust count accordingly.
*/
@@ -574,14 +583,14 @@
// Now convert to TCHAR (which means defcodepage)
flags = WC_NO_BEST_FIT_CHARS; // TODO: Think about this
wlen = n;
- BOOL defaulted=FALSE;
+ BOOL defaulted = FALSE;
BOOL * pdefaulted = &defaulted;
if (cpout == CP_UTF8)
{
flags = 0;
pdefaulted = NULL;
}
- n = WideCharToMultiByte(cpout, flags, wbuff, n, dest, destsize-1, NULL, pdefaulted);
+ n = WideCharToMultiByte(cpout, flags, wbuff, n, dest, destsize - 1, NULL, pdefaulted);
if (!n)
{
int nsyserr = ::GetLastError();
@@ -642,11 +651,11 @@
return true;
}
if ((unicoding1 == UCS2LE && unicoding2 == UCS2BE)
- || (unicoding1 == UCS2BE && unicoding2 == UCS2LE))
+ || (unicoding1 == UCS2BE && unicoding2 == UCS2LE))
{
// simple byte swap
dest->resize(srcbytes);
- for (int i=0; i<srcbytes; i += 2)
+ for (int i = 0; i < srcbytes; i += 2)
{
// Byte-swap into destination
dest->ptr[i] = src[i+1];
@@ -671,12 +680,12 @@
int destcp = (unicoding2 == UTF8 ? CP_UTF8 : codepage2);
DWORD flags = 0;
- int bytes = WideCharToMultiByte(destcp, flags, (const wchar_t*)src, srcbytes/2, 0, 0, NULL, NULL);
+ int bytes = WideCharToMultiByte(destcp, flags, (const wchar_t*)src, srcbytes / 2, 0, 0, NULL, NULL);
dest->resize(bytes);
int losses = 0;
- bytes = WideCharToMultiByte(destcp, flags, (const wchar_t*)src, srcbytes/2, (char *)dest->ptr, dest->capacity, NULL, NULL);
+ bytes = WideCharToMultiByte(destcp, flags, (const wchar_t*)src, srcbytes / 2, (char *)dest->ptr, dest->capacity, NULL, NULL);
dest->size = bytes;
- return losses==0;
+ return losses == 0;
}
else
{
@@ -685,7 +694,7 @@
DWORD flags = 0;
int wchars = MultiByteToWideChar(srccp, flags, (const char*)src, srcbytes, 0, 0);
dest->resize(wchars*2);
- wchars = MultiByteToWideChar(srccp, flags, (const char*)src, srcbytes, (LPWSTR)dest->ptr, dest->capacity/2);
+ wchars = MultiByteToWideChar(srccp, flags, (const char*)src, srcbytes, (LPWSTR)dest->ptr, dest->capacity / 2);
dest->size = wchars * 2;
return true;
}
@@ -697,6 +706,11 @@
* @param [in] size Size of the buffer.
* @param [out] pBom Returns true if buffer had BOM bytes, false otherwise.
* @return One of UNICODESET values as encoding.
+ * EF BB BF UTF-8
+ * FF FE UTF-16, little endian
+ * FE FF UTF-16, big endian
+ * FF FE 00 00 UTF-32, little endian
+ * 00 00 FE FF UTF-32, big-endian
*/
UNICODESET DetermineEncoding(unsigned char* pBuffer, int size, bool * pBom)
{
@@ -707,12 +721,12 @@
{
if (pBuffer[0] == 0xFF && pBuffer[1] == 0xFE)
{
- unicoding = ucr::UCS2LE;
+ unicoding = ucr::UCS2LE; //UNI little endian
*pBom = true;
}
else if (pBuffer[0] == 0xFE && pBuffer[1] == 0xFF)
{
- unicoding = ucr::UCS2BE;
+ unicoding = ucr::UCS2BE; //UNI big endian
*pBom = true;
}
}
@@ -750,7 +764,7 @@
if (cp == CP_THREAD_ACP) // should only happen on Win2000+
{
TCHAR buff[32];
- if (GetLocaleInfo(GetThreadLocale(), LOCALE_IDEFAULTANSICODEPAGE, buff, sizeof(buff)/sizeof(buff[0])))
+ if (GetLocaleInfo(GetThreadLocale(), LOCALE_IDEFAULTANSICODEPAGE, buff, sizeof(buff) / sizeof(buff[0])))
cp = _ttol(buff);
else
// a valid codepage is better than no codepage
@@ -767,5 +781,5 @@
bool EqualCodepages(int cp1, int cp2)
{
return (cp1 == cp2)
- || (NormalizeCodepage(cp1) == NormalizeCodepage(cp2));
+ || (NormalizeCodepage(cp1) == NormalizeCodepage(cp2));
}
Modified: branches/R2_14/Src/Common/unicoder.h
===================================================================
--- branches/R2_14/Src/Common/unicoder.h 2010-10-20 17:50:53 UTC (rev 7325)
+++ branches/R2_14/Src/Common/unicoder.h 2010-10-20 20:04:08 UTC (rev 7326)
@@ -5,7 +5,7 @@
* @date Edited: 2006-02-20 (Perry Rapp)
*
* @brief Declaration of utility unicode conversion routines
- */
+ */
// ID line follows -- this is updated by SVN
// $Id$
@@ -14,12 +14,14 @@
#include "UnicodeString.h"
-namespace ucr {
+namespace ucr
+{
/**
* @brief A simple buffer struct.
*/
-struct buffer {
+struct buffer
+{
unsigned char * ptr; /**< Pointer to a buffer. */
unsigned int capacity; /**< Buffer's size in bytes. */
unsigned int size; /**< Size of the data in the buffer, <= capacity. */
@@ -47,7 +49,7 @@
int to_utf8_advance(unsigned int u, unsigned char * &lpd);
String maketchar(unsigned int ch, bool & lossy);
int writeBom(void* dest, UNICODESET unicoding);
-unsigned int get_unicode_char(unsigned char * ptr, UNICODESET unicoding, int codepage=0);
+unsigned int get_unicode_char(unsigned char * ptr, UNICODESET unicoding, int codepage = 0);
String maketstring(const char* lpd, unsigned int len, int codepage, bool * lossy);
String maketchar(unsigned int unich, bool & lossy);
String maketchar(unsigned int unich, bool & lossy, unsigned int codepage);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|