From: Christian P. <cp...@us...> - 2005-01-27 10:41:53
|
Update of /cvsroot/pclasses/pclasses2/src/IO In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv1824/src/IO Modified Files: URL.cpp Log Message: Added URL-encoding/decoding. Re-implemented URL-parser (code was too complex and slow). Index: URL.cpp =================================================================== RCS file: /cvsroot/pclasses/pclasses2/src/IO/URL.cpp,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- URL.cpp 17 Jan 2005 21:58:36 -0000 1.2 +++ URL.cpp 27 Jan 2005 10:41:37 -0000 1.3 @@ -28,6 +28,7 @@ #endif #include <sstream> +#include <iomanip> namespace P { namespace IO { @@ -255,7 +256,123 @@ return *this; } -URL& URL::operator=(const std::string& url) throw(InvalidURL/*,NetDbError*/) +URL& URL::operator=(const std::string& url) throw(InvalidURL) +{ + using std::string; + + // file-URLs are handled specially ... + if(url.substr(0, 5) == "file:") + { + _proto = "file"; + _host = ""; + _user = ""; + _passwd = ""; + _port = 0; + _path = decode(url.substr(5, string::npos)); + std::cout << "path: '" << _path << "'" << std::endl; + _args.clear(); + _anchor = ""; + + return *this; + } + + // search for protocol delimiter ... + string::size_type protoEndPos = url.find("://"); + if(protoEndPos == string::npos) + throw InvalidURL("Invalid url", P_SOURCEINFO); + + string proto = url.substr(0, protoEndPos); + + // get the url-component starting positions ... + string::size_type hostStartPos = protoEndPos + 3; + string::size_type pathStartPos = url.find('/', hostStartPos); + string::size_type argsStartPos = url.find('?', pathStartPos == string::npos ? hostStartPos : pathStartPos); + string::size_type anchorStartPos = url.find('#', argsStartPos == string::npos ? hostStartPos : argsStartPos); + + // find the lenght of the hostname ... + string::size_type hostLen; + if(pathStartPos != string::npos) + hostLen = pathStartPos - hostStartPos; + else if(argsStartPos != string::npos) + hostLen = argsStartPos - hostStartPos; + else if(anchorStartPos != string::npos) + hostLen = anchorStartPos - hostStartPos; + else + hostLen = string::npos; + + // get the hostname (including username/passwd and port)... + string host = url.substr(hostStartPos, hostLen); + + // find the length of the path ... + string::size_type pathLen; + if(argsStartPos != string::npos) + pathLen = argsStartPos - pathStartPos; + else if(anchorStartPos != string::npos) + pathLen = anchorStartPos - pathStartPos; + else + pathLen = string::npos; + + // get the path ... + string path = "/"; + if(pathStartPos != string::npos) + path = url.substr(pathStartPos, pathLen); + + string::size_type argsLen; + if(anchorStartPos != string::npos) + argsLen = anchorStartPos - argsStartPos; + else + argsLen = string::npos; + + // get the arguments ... + string args; + if(argsStartPos != string::npos) + args = url.substr(argsStartPos, argsLen); + + // get the anchor ... + string anchor; + if(anchorStartPos != string::npos) + anchor = url.substr(anchorStartPos + 1, string::npos); + + // get username and password from hostname ... + string userPasswd, user, passwd; + hostStartPos = host.find("@"); + if(hostStartPos != string::npos) + { + userPasswd = host.substr(0, hostStartPos); + host = host.substr(hostStartPos + 1, string::npos); + + string::size_type passwdStartPos = userPasswd.find(":"); + if(passwdStartPos != string::npos) + { + user = userPasswd.substr(0, passwdStartPos); + passwd = userPasswd.substr(passwdStartPos + 1, string::npos); + } + } + + // get port from hostname ... + unsigned short port = 0; + string::size_type portStartPos = host.find(":"); + if(portStartPos != string::npos) + { + string portStr = host.substr(portStartPos + 1, string::npos); + host = host.substr(0, portStartPos); + port = atoi(portStr.c_str()); + } + + _proto = proto; + _host = host; + _user = user; + _passwd = passwd; + _port = port; + _path = path; + _args = fromString(args); + _anchor = anchor; + + return *this; +} + + +/*URL& URL::operator=(const std::string& url) throw(InvalidURL) { std::string proto, host, user, passwd, path, port; std::istringstream is(url); @@ -365,7 +482,13 @@ // parse path while((is >> ch)) + { + // argument delimiter ? + if(ch == '?') + break; + os << ch; + } //@@fixme ... parse args and anchor @@ -379,7 +502,7 @@ _anchor = ""; return *this; -} +}*/ bool URL::operator==(const URL& url) const throw() { @@ -424,7 +547,7 @@ // add URL arguments ... std::string args = URL::toString(url._args); if(!args.empty()) - os << '?' << args; + os << args; // add anchor if set ... if(!url._anchor.empty()) @@ -445,14 +568,68 @@ return is; } +bool charNeedEncode(unsigned char ch) +{ + bool ret = false; + + if((ch <= 0x1f) || (ch == 0x7f) || // ASCII control characters... + (ch >= 0x80 && ch <= 0xff) || // non-ASCII characters ... + (ch == 0x24) || (ch == 0x26) || // reserved characters... + (ch == 0x2b) || (ch == 0x2c) || + (ch == 0x2f) || (ch == 0x3a) || + (ch == 0x3b) || (ch == 0x3d) || + (ch == 0x3f) || (ch == 0x40) || + (ch == 0x20) || (ch == 0x22) || // unsafe characters ... + (ch == 0x3c) || (ch == 0x3e) || + (ch == 0x23) || (ch == 0x25) || + (ch == 0x7b) || (ch == 0x7d) || + (ch == 0x7c) || (ch == 0x5c) || + (ch == 0x5e) || (ch == 0x7e) || + (ch == 0x5b) || (ch == 0x5d) || + (ch == 0x60)) + ret = true; + + return ret; +} + std::string URL::encode(const std::string& str) { - return str; + std::ostringstream os; + os << std::setfill('0'); + + for(std::string::size_type i = 0; i < str.size(); ++i) + { + char ch = str[i]; + if(charNeedEncode(ch)) + os << '%' << std::setw(2) << std::hex << (((int)ch) & 0x000000ff); + else + os << ch; + } + + return os.str(); } std::string URL::decode(const std::string& str) { - return str; + std::ostringstream os; + std::istringstream is(str); + + char ch; + while((is >> ch)) + { + if(ch == '%') + { + int val; + is >> std::hex >> val; + os << (char) val; + } + else + { + os << ch; + } + } + + return os.str(); } } // !namespace IO |