From: <and...@us...> - 2024-05-31 18:34:22
|
Revision: 7431 http://sourceforge.net/p/nsis/code/7431 Author: anders_k Date: 2024-05-31 18:34:18 +0000 (Fri, 31 May 2024) Log Message: ----------- Support setting source file decoding charset with Python-style PEP 263 magic comment Modified Paths: -------------- NSIS/trunk/Docs/src/history.but NSIS/trunk/Docs/src/script.but NSIS/trunk/Source/lineparse.cpp NSIS/trunk/Source/lineparse.h NSIS/trunk/Source/script.cpp NSIS/trunk/Source/utf.cpp Modified: NSIS/trunk/Docs/src/history.but =================================================================== --- NSIS/trunk/Docs/src/history.but 2024-05-30 19:07:00 UTC (rev 7430) +++ NSIS/trunk/Docs/src/history.but 2024-05-31 18:34:18 UTC (rev 7431) @@ -12,6 +12,8 @@ \S2{} Minor Changes +\b Support setting source file decoding charset with magic comment (\W{http://sf.net/p/nsis/patches/292}{patch #292}) + \b Make System/Resource.dll reproducible (\W{http://sf.net/p/nsis/patches/312}{patch #312}) \S2{} Translations Modified: NSIS/trunk/Docs/src/script.but =================================================================== --- NSIS/trunk/Docs/src/script.but 2024-05-30 19:07:00 UTC (rev 7430) +++ NSIS/trunk/Docs/src/script.but 2024-05-31 18:34:18 UTC (rev 7431) @@ -31,6 +31,10 @@ If you want a parameter to start with ; or # put it in quotes. +If the first or second line in the file is a \#{pep263}comment with the following format, the rest of the file is decoded using the specified character set. + +\c # -*- coding: utf-8 -*- + \\<b\\>Plug-ins\\</b\\> To call a plug-in, use 'plugin::command [parameters]'. For more info see \R{plugindlls}{Plug-in DLLs}. Modified: NSIS/trunk/Source/lineparse.cpp =================================================================== --- NSIS/trunk/Source/lineparse.cpp 2024-05-30 19:07:00 UTC (rev 7430) +++ NSIS/trunk/Source/lineparse.cpp 2024-05-31 18:34:18 UTC (rev 7431) @@ -52,11 +52,11 @@ return m_incommentblock; } -int LineParser::parse(TCHAR *line, int ignore_escaping/*=0*/) // returns -1 on error +int LineParser::parse(const TCHAR *line, int ignore_escaping/*=0*/, NStreamEncoding*pEncChange) // returns -1 on error { freetokens(); bool bPrevCB=m_incommentblock; - int n=doline(line, ignore_escaping); + int n=doline(line, ignore_escaping, pEncChange); if (n) return n; if (m_nt) { @@ -305,8 +305,14 @@ m_nt=0; } -int LineParser::doline(TCHAR *line, int ignore_escaping/*=0*/) +static bool IsPEP263EncodingCharacter(TCHAR c) { + TCHAR low = S7ChLwr(c); + return ('a' <= low && low <= 'z') || ('0' <= c && c <= '9') || c == '.' || c == '-' || c == '_'; +} + +int LineParser::doline(const TCHAR *line, int ignore_escaping/*=0*/, NStreamEncoding*pEncChange) +{ m_nt=0; m_incomment = false; while (*line == _T(' ') || *line == _T('\t')) line++; @@ -332,6 +338,21 @@ if (*line == _T(';') || *line == _T('#')) { m_incomment = true; + TCHAR *p = const_cast<TCHAR*>(line); + if (pEncChange && *++p) // Check for Python PEP 263 magic encoding comment + { + p = _tcsstr(p, L"coding"); + if (p) p += sizeof("coding") - 1; else break; + if (*p == ':' || *p == '=') ++p; else break; + while (*p == _T(' ') || *p == _T('\t')) ++p; + TCHAR buf[200]; + if (strtrycpy(buf, p, COUNTOF(buf))) + { + for (p = buf; IsPEP263EncodingCharacter(*p);) ++p; + *p = '\0'; + pEncChange->SetCodepage(GetEncodingFromString(buf)); + } + } break; } if (*line == _T('/') && *(line+1) == _T('*')) @@ -346,7 +367,7 @@ else if (*line == _T('`')) lstate=4; if (lstate) line++; int nc=0; - TCHAR *p = line; + const TCHAR *p = line; while (*line) { if (line[0] == _T('$') && line[1] == _T('\\')) Modified: NSIS/trunk/Source/lineparse.h =================================================================== --- NSIS/trunk/Source/lineparse.h 2024-05-30 19:07:00 UTC (rev 7430) +++ NSIS/trunk/Source/lineparse.h 2024-05-31 18:34:18 UTC (rev 7431) @@ -20,6 +20,7 @@ #define _LINEPARSE_H_ #include "tchar.h" +#include "utf.h" class LineParser { public: @@ -29,7 +30,7 @@ bool inComment(); bool inCommentBlock(); - int parse(TCHAR *line, int ignore_escaping=0); // returns -1 on error + int parse(const TCHAR *line, int ignore_escaping=0, NStreamEncoding*pEncChange=NULL); // returns -1 on error int getnumtokens(); void eattoken(); int gettoken_int(int token, int *success=0) const; @@ -48,7 +49,7 @@ private: void freetokens(); - int doline(TCHAR *line, int ignore_escaping=0); + int doline(const TCHAR *line, int ignore_escaping=0, NStreamEncoding*pEncChange=NULL); inline int validate_token_index(int token, int *success=0) const; int m_eat; Modified: NSIS/trunk/Source/script.cpp =================================================================== --- NSIS/trunk/Source/script.cpp 2024-05-30 19:07:00 UTC (rev 7430) +++ NSIS/trunk/Source/script.cpp 2024-05-31 18:34:18 UTC (rev 7431) @@ -219,7 +219,7 @@ LineParser prevline(inside_comment); prevline.parse((TCHAR*)m_linebuild.get()); LineParser thisline(inside_comment); - thisline.parse((TCHAR*)str); + thisline.parse(str); if (prevline.inComment() && !thisline.inComment()) { @@ -242,8 +242,14 @@ // escaped quotes should be ignored for compile time commands that set defines // because defines can be inserted in commands at a later stage bool ignore_escaping = (!_tcsnicmp((TCHAR*)m_linebuild.get(),_T("!define"),7) || !_tcsncicmp((TCHAR*)m_linebuild.get(),_T("!insertmacro"),12)); - res=line.parse((TCHAR*)m_linebuild.get(), ignore_escaping); + NStreamEncoding enc(NStreamEncoding::UNKNOWN); + + res=line.parse((TCHAR*)m_linebuild.get(), ignore_escaping, linecnt < 3 ? &enc : NULL); + + if (enc.GetCodepage() != NStreamEncoding::UNKNOWN && curlinereader) + curlinereader->StreamEncoding().SafeSetCodepage(enc.GetCodepage()); + inside_comment = line.inCommentBlock(); // if ignoring, ignore all lines that don't begin with an exclamation mark Modified: NSIS/trunk/Source/utf.cpp =================================================================== --- NSIS/trunk/Source/utf.cpp 2024-05-30 19:07:00 UTC (rev 7430) +++ NSIS/trunk/Source/utf.cpp 2024-05-31 18:34:18 UTC (rev 7431) @@ -274,6 +274,7 @@ } return 0; } + UINT DetectUTFBOM(FILE*strm) { /*\ @@ -325,19 +326,32 @@ return 0; } +static bool IsUTFEncodingString(const TCHAR*s, const TCHAR*suff) +{ + if (S7ChLwr(s[0]) == 'u' && S7ChLwr(s[1]) == 't' && S7ChLwr(s[2]) == 'f') + { + s += 3 + (s[3] == '-' || s[3] == '_'); + return !_tcsicmp(s, suff); + } + return false; +} + WORD GetEncodingFromString(const TCHAR*s, bool&BOM) { BOM = false; - if (!_tcsicmp(s,_T("ACP"))) return NStreamEncoding::ACP; - if (!_tcsicmp(s,_T("OEM"))) return NStreamEncoding::OEMCP; - if (!_tcsicmp(s,_T("UTF8"))) return NStreamEncoding::UTF8; - if ((!_tcsicmp(s,_T("UTF8SIG")) || !_tcsicmp(s,_T("UTF8BOM"))) && (BOM = true)) + if (!_tcsicmp(s,_T("ACP"))) + return NStreamEncoding::ACP; + if (!_tcsicmp(s,_T("OEM"))) + return NStreamEncoding::OEMCP; + if (IsUTFEncodingString(s, _T("8"))) return NStreamEncoding::UTF8; - if (!_tcsicmp(s,_T("UTF16LE")) || (!_tcsicmp(s,_T("UTF16LEBOM")) && (BOM = true))) + if (IsUTFEncodingString(s, _T("8SIG")) || IsUTFEncodingString(s, _T("8BOM"))) + return (BOM = true, NStreamEncoding::UTF8); + if (IsUTFEncodingString(s, _T("16LE")) || (IsUTFEncodingString(s, _T("16LEBOM")) && (BOM = true))) return NStreamEncoding::UTF16LE; - if (!_tcsicmp(s,_T("UTF16BE")) || (!_tcsicmp(s,_T("UTF16BEBOM")) && (BOM = true))) - return NStreamEncoding::UTF16BE; - if (S7IsChEqualI('C',*s++) && S7IsChEqualI('P',*s++)) + if (IsUTFEncodingString(s, _T("16BE")) || (IsUTFEncodingString(s, _T("16BEBOM")) && (BOM = true))) + return NStreamEncoding::UTF16LE; + if (S7IsChEqualI('C', *s++) && S7IsChEqualI('P', *s++)) { int cp = _tstoi(s); if (cp > 0 && cp < NStreamEncoding::CPCOUNT) return (WORD) cp; @@ -344,6 +358,7 @@ } return NStreamEncoding::UNKNOWN; } + WORD GetEncodingFromString(const TCHAR*s) { bool bom; @@ -365,10 +380,10 @@ case UTF8: p = _T("UTF8"); break; case BINARY: p = _T("BIN"); break; default: - _stprintf(mybuf,_T("CP%u"),CP); + _stprintf(mybuf,_T("CP%u"), CP); if (CP >= NStreamEncoding::CPCOUNT) p = _T("?"); } - _tcscpy(Buf,p); + _tcscpy(Buf, p); } bool NBaseStream::Attach(FILE*hFile, WORD enc, bool Seek /*= true*/) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |