From: Chandru <cha...@us...> - 2009-07-01 22:44:14
|
Update of /cvsroot/jungerl/jungerl/lib/ibrowse/src In directory fdv4jf1.ch3.sourceforge.com:/tmp/cvs-serv26761/src Modified Files: ibrowse.erl ibrowse_http_client.erl ibrowse_lb.erl ibrowse_test.erl Log Message: ibrowse-1.5.0 - Use binaries internally. Lots of other changes. See README Index: ibrowse.erl =================================================================== RCS file: /cvsroot/jungerl/jungerl/lib/ibrowse/src/ibrowse.erl,v retrieving revision 1.7 retrieving revision 1.8 diff -u -d -r1.7 -r1.8 --- ibrowse.erl 21 May 2008 15:28:11 -0000 1.7 +++ ibrowse.erl 1 Jul 2009 22:43:19 -0000 1.8 @@ -6,8 +6,8 @@ %%% Created : 11 Oct 2003 by Chandrashekhar Mullaparthi <cha...@t-...> %%%------------------------------------------------------------------- %% @author Chandrashekhar Mullaparthi <chandrashekhar dot mullaparthi at gmail dot com> -%% @copyright 2005-2008 Chandrashekhar Mullaparthi -%% @version 1.4 +%% @copyright 2005-2009 Chandrashekhar Mullaparthi +%% @version 1.5.0 %% @doc The ibrowse application implements an HTTP 1.1 client. This %% module implements the API of the HTTP client. There is one named %% process called 'ibrowse' which assists in load balancing and maintaining configuration. There is one load balancing process per unique webserver. There is @@ -96,6 +96,7 @@ trace_off/0, trace_on/2, trace_off/2, + all_trace_off/0, show_dest_status/2 ]). @@ -105,8 +106,6 @@ -import(ibrowse_lib, [ parse_url/1, - printable_date/0, - get_value/2, get_value/3, do_trace/2 ]). @@ -114,6 +113,7 @@ -record(state, {trace = false}). -include("ibrowse.hrl"). +-include_lib("stdlib/include/ms_transform.hrl"). -define(DEF_MAX_SESSIONS,10). -define(DEF_MAX_PIPELINE_SIZE,10). @@ -170,7 +170,7 @@ %% For a description of SSL Options, look in the ssl manpage. If the %% HTTP Version to use is not specified, the default is 1.1. %% <br/> -%% <p>The <code>host_header</code> is useful in the case where ibrowse is +%% <p>The <code>host_header</code> option is useful in the case where ibrowse is %% connecting to a component such as <a %% href="http://www.stunnel.org">stunnel</a> which then sets up a %% secure connection to a webserver. In this case, the URL supplied to @@ -188,10 +188,40 @@ %% <li>Whenever an error occurs in the processing of a request, ibrowse will return as much %% information as it has, such as HTTP Status Code and HTTP Headers. When this happens, the response %% is of the form <code>{error, {Reason, {stat_code, StatusCode}, HTTP_headers}}</code></li> +%% +%% <li>The <code>inactivity_timeout</code> option is useful when +%% dealing with large response bodies and/or slow links. In these +%% cases, it might be hard to estimate how long a request will take to +%% complete. In such cases, the client might want to timeout if no +%% data has been received on the link for a certain time interval.</li> +%% +%% <li> +%% The <code>connect_timeout</code> option is to specify how long the +%% client process should wait for connection establishment. This is +%% useful in scenarios where connections to servers are usually setup +%% very fast, but responses might take much longer compared to +%% connection setup. In such cases, it is better for the calling +%% process to timeout faster if there is a problem (DNS lookup +%% delays/failures, network routing issues, etc). The total timeout +%% value specified for the request will enforced. To illustrate using +%% an example: +%% <code> +%% ibrowse:send_req("http://www.example.com/cgi-bin/request", [], get, [], [{connect_timeout, 100}], 1000). +%% </code> +%% In the above invocation, if the connection isn't established within +%% 100 milliseconds, the request will fail with +%% <code>{error, conn_failed}</code>.<br/> +%% If connection setup succeeds, the total time allowed for the +%% request to complete will be 1000 milliseconds minus the time taken +%% for connection setup. +%% </li> %% </ul> +%% %% @spec send_req(Url::string(), Headers::headerList(), Method::method(), Body::body(), Options::optionList()) -> response() %% optionList() = [option()] %% option() = {max_sessions, integer()} | +%% {response_format,response_format()}| +%% {stream_chunk_size, integer()} | %% {max_pipeline_size, integer()} | %% {trace, boolean()} | %% {is_ssl, boolean()} | @@ -210,8 +240,10 @@ %% {stream_to, process()} | %% {http_vsn, {MajorVsn, MinorVsn}} | %% {host_header, string()} | +%% {inactivity_timeout, integer()} | +%% {connect_timeout, integer()} | %% {transfer_encoding, {chunked, ChunkSize}} -%% +%% %% process() = pid() | atom() %% username() = string() %% password() = string() @@ -219,7 +251,7 @@ %% ChunkSize = integer() %% srtf() = boolean() | filename() %% filename() = string() -%% +%% response_format() = list | binary send_req(Url, Headers, Method, Body, Options) -> send_req(Url, Headers, Method, Body, Options, 30000). @@ -230,7 +262,8 @@ send_req(Url, Headers, Method, Body, Options, Timeout) -> case catch parse_url(Url) of #url{host = Host, - port = Port} = Parsed_url -> + port = Port, + protocol = Protocol} = Parsed_url -> Lb_pid = case ets:lookup(ibrowse_lb, {Host, Port}) of [] -> get_lb_pid(Parsed_url); @@ -241,9 +274,10 @@ Max_pipeline_size = get_max_pipeline_size(Host, Port, Options), Options_1 = merge_options(Host, Port, Options), {SSLOptions, IsSSL} = - case get_value(is_ssl, Options_1, false) of + case (Protocol == https) orelse + get_value(is_ssl, Options_1, false) of false -> {[], false}; - true -> {get_value(ssl_options, Options_1), true} + true -> {get_value(ssl_options, Options_1, []), true} end, case ibrowse_lb:spawn_connection(Lb_pid, Parsed_url, Max_sessions, @@ -310,16 +344,28 @@ do_send_req(Conn_Pid, Parsed_url, Headers, Method, Body, Options, Timeout) -> case catch ibrowse_http_client:send_req(Conn_Pid, Parsed_url, - Headers, Method, Body, + Headers, Method, ensure_bin(Body), Options, Timeout) of {'EXIT', {timeout, _}} -> {error, req_timedout}; {'EXIT', Reason} -> {error, {'EXIT', Reason}}; + {ok, St_code, Headers, Body} = Ret when is_binary(Body) -> + case get_value(response_format, Options, list) of + list -> + {ok, St_code, Headers, binary_to_list(Body)}; + binary -> + Ret + end; Ret -> Ret end. +ensure_bin(L) when is_list(L) -> + list_to_binary(L); +ensure_bin(B) when is_binary(B) -> + B. + %% @doc Creates a HTTP client process to the specified Host:Port which %% is not part of the load balancing pool. This is useful in cases %% where some requests to a webserver might take a long time whereas @@ -389,17 +435,25 @@ %% @doc Turn tracing on for all connections to the specified HTTP %% server. Host is whatever is specified as the domain name in the URL -%% @spec trace_on(Host, Port) -> term() +%% @spec trace_on(Host, Port) -> ok %% Host = string() %% Port = integer() trace_on(Host, Port) -> - ibrowse ! {trace, true, Host, Port}. + ibrowse ! {trace, true, Host, Port}, + ok. %% @doc Turn tracing OFF for all connections to the specified HTTP %% server. -%% @spec trace_off(Host, Port) -> term() +%% @spec trace_off(Host, Port) -> ok trace_off(Host, Port) -> - ibrowse ! {trace, false, Host, Port}. + ibrowse ! {trace, false, Host, Port}, + ok. + +%% @doc Turn Off ALL tracing +%% @spec all_trace_off() -> ok +all_trace_off() -> + ibrowse ! all_trace_off, + ok. %% @doc Shows some internal information about load balancing to a %% specified Host:Port. Info about workers spawned using @@ -577,6 +631,30 @@ %% {noreply, State, Timeout} | %% {stop, Reason, State} (terminate/2 is called) %%-------------------------------------------------------------------- +handle_info(all_trace_off, State) -> + Mspec = [{{ibrowse_conf,{trace,'$1','$2'},true},[],[{{'$1','$2'}}]}], + Trace_on_dests = ets:select(ibrowse_conf, Mspec), + Fun = fun(#lb_pid{host_port = {H, P}, pid = Pid}, _) -> + case lists:member({H, P}, Trace_on_dests) of + false -> + ok; + true -> + catch Pid ! {trace, false} + end; + (#client_conn{key = {H, P, Pid}}, _) -> + case lists:member({H, P}, Trace_on_dests) of + false -> + ok; + true -> + catch Pid ! {trace, false} + end; + (_, Acc) -> + Acc + end, + ets:foldl(Fun, undefined, ibrowse_lb), + ets:select_delete(ibrowse_conf, [{{ibrowse_conf,{trace,'$1','$2'},true},[],['true']}]), + {noreply, State}; + handle_info({trace, Bool}, State) -> put(my_trace_flag, Bool), {noreply, State}; Index: ibrowse_lb.erl =================================================================== RCS file: /cvsroot/jungerl/jungerl/lib/ibrowse/src/ibrowse_lb.erl,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- ibrowse_lb.erl 27 Mar 2008 01:36:21 -0000 1.1 +++ ibrowse_lb.erl 1 Jul 2009 22:43:19 -0000 1.2 @@ -39,13 +39,6 @@ max_pipeline_size, num_cur_sessions = 0}). --import(ibrowse_lib, [ - parse_url/1, - printable_date/0, - get_value/3 - ]). - - -include("ibrowse.hrl"). %%==================================================================== Index: ibrowse_http_client.erl =================================================================== RCS file: /cvsroot/jungerl/jungerl/lib/ibrowse/src/ibrowse_http_client.erl,v retrieving revision 1.18 retrieving revision 1.19 diff -u -d -r1.18 -r1.19 --- ibrowse_http_client.erl 21 May 2008 15:28:11 -0000 1.18 +++ ibrowse_http_client.erl 1 Jul 2009 22:43:19 -0000 1.19 @@ -38,28 +38,33 @@ -include("ibrowse.hrl"). --record(state, {host, port, +-record(state, {host, port, use_proxy = false, proxy_auth_digest, - ssl_options = [], is_ssl = false, socket, - reqs=queue:new(), cur_req, status=idle, http_status_code, - reply_buffer=[], rep_buf_size=0, recvd_headers=[], + ssl_options = [], is_ssl = false, socket, + reqs=queue:new(), cur_req, status=idle, http_status_code, [...1108 lines suppressed...] + +flatten([H | _] = L) when is_integer(H) -> + L; +flatten([H | _] = L) when is_list(H) -> + lists:flatten(L); +flatten([]) -> + []. + +get_stream_chunk_size(Options) -> + case lists:keysearch(stream_chunk_size, 1, Options) of + {value, {_, V}} when V > 0 -> + V; + _ -> + ?DEFAULT_STREAM_CHUNK_SIZE + end. + +get_inac_timeout(#state{cur_req = #request{options = Opts}}) -> + get_value(inactivity_timeout, Opts, infinity); +get_inac_timeout(#state{cur_req = undefined}) -> + infinity. Index: ibrowse_test.erl =================================================================== RCS file: /cvsroot/jungerl/jungerl/lib/ibrowse/src/ibrowse_test.erl,v retrieving revision 1.3 retrieving revision 1.4 diff -u -d -r1.3 -r1.4 --- ibrowse_test.erl 21 May 2008 15:28:11 -0000 1.3 +++ ibrowse_test.erl 1 Jul 2009 22:43:19 -0000 1.4 @@ -11,14 +11,16 @@ do_send_req/2, unit_tests/0, unit_tests/1, + unit_tests_1/2, drv_ue_test/0, drv_ue_test/1, ue_test/0, - ue_test/1 + ue_test/1, + verify_chunked_streaming/0, + verify_chunked_streaming/1, + i_do_async_req_list/4 ]). --import(ibrowse_lib, [printable_date/0]). - %% Use ibrowse:set_max_sessions/3 and ibrowse:set_max_pipeline_size/3 to %% tweak settings before running the load test. The defaults are 10 and 10. load_test(Url, NumWorkers, NumReqsPerWorker) when is_list(Url), @@ -46,7 +48,7 @@ log_msg("End time : ~1000.p~n", [calendar:now_to_local_time(End_time)]), Elapsed_time_secs = trunc(timer:now_diff(End_time, Start_time) / 1000000), log_msg("Elapsed : ~p~n", [Elapsed_time_secs]), - log_msg("Reqs/sec : ~p~n", [(NumWorkers*NumReqsPerWorker) / Elapsed_time_secs]), + log_msg("Reqs/sec : ~p~n", [round(trunc((NumWorkers*NumReqsPerWorker) / Elapsed_time_secs))]), dump_errors(). init_results() -> @@ -88,7 +90,7 @@ do_wait() end end. - + do_send_req(Url, NumReqs) -> do_send_req_1(Url, NumReqs). @@ -149,7 +151,7 @@ -define(TEST_LIST, [{"http://intranet/messenger", get}, {"http://www.google.co.uk", get}, {"http://www.google.com", get}, - {"http://www.google.com", options}, + {"http://www.google.com", options}, {"http://www.sun.com", get}, {"http://www.oracle.com", get}, {"http://www.bbc.co.uk", get}, @@ -172,26 +174,129 @@ {"http://jigsaw.w3.org/HTTP/400/toolong/", get}, {"http://jigsaw.w3.org/HTTP/300/", get}, {"http://jigsaw.w3.org/HTTP/Basic/", get, [{basic_auth, {"guest", "guest"}}]}, - {"http://jigsaw.w3.org/HTTP/CL/", get} + {"http://jigsaw.w3.org/HTTP/CL/", get}, + {"http://www.httpwatch.com/httpgallery/chunked/", get} ]). unit_tests() -> unit_tests([]). unit_tests(Options) -> + {Pid, Ref} = erlang:spawn_monitor(?MODULE, unit_tests_1, [self(), Options]), + receive + {done, Pid} -> + ok; + {'DOWN', Ref, _, _, Info} -> + io:format("Test process crashed: ~p~n", [Info]) + after 60000 -> + io:format("Timed out waiting for tests to complete~n", []) + end. + +unit_tests_1(Parent, Options) -> lists:foreach(fun({Url, Method}) -> execute_req(Url, Method, Options); ({Url, Method, X_Opts}) -> execute_req(Url, Method, X_Opts ++ Options) - end, ?TEST_LIST). + end, ?TEST_LIST), + Parent ! {done, self()}. -execute_req(Url, Method) -> - execute_req(Url, Method, []). +verify_chunked_streaming() -> + verify_chunked_streaming([]). + +verify_chunked_streaming(Options) -> + Url = "http://www.httpwatch.com/httpgallery/chunked/", + io:format("URL: ~s~n", [Url]), + io:format("Fetching data without streaming...~n", []), + Result_without_streaming = ibrowse:send_req( + Url, [], get, [], + [{response_format, binary} | Options]), + io:format("Fetching data with streaming as list...~n", []), + Async_response_list = do_async_req_list( + Url, get, [{response_format, list} | Options]), + io:format("Fetching data with streaming as binary...~n", []), + Async_response_bin = do_async_req_list( + Url, get, [{response_format, binary} | Options]), + compare_responses(Result_without_streaming, Async_response_list, Async_response_bin). + +compare_responses({ok, St_code, _, Body}, {ok, St_code, _, Body}, {ok, St_code, _, Body}) -> + success; +compare_responses({ok, St_code, _, Body_1}, {ok, St_code, _, Body_2}, {ok, St_code, _, Body_3}) -> + case Body_1 of + Body_2 -> + io:format("Body_1 and Body_2 match~n", []); + Body_3 -> + io:format("Body_1 and Body_3 match~n", []); + _ when Body_2 == Body_3 -> + io:format("Body_2 and Body_3 match~n", []); + _ -> + io:format("All three bodies are different!~n", []) + end, + io:format("Body_1 -> ~p~n", [Body_1]), + io:format("Body_2 -> ~p~n", [Body_2]), + io:format("Body_3 -> ~p~n", [Body_3]), + fail_bodies_mismatch; +compare_responses(R1, R2, R3) -> + io:format("R1 -> ~p~n", [R1]), + io:format("R2 -> ~p~n", [R2]), + io:format("R3 -> ~p~n", [R3]), + fail. + +%% do_async_req_list(Url) -> +%% do_async_req_list(Url, get). + +%% do_async_req_list(Url, Method) -> +%% do_async_req_list(Url, Method, [{stream_to, self()}, +%% {stream_chunk_size, 1000}]). + +do_async_req_list(Url, Method, Options) -> + {Pid,_} = erlang:spawn_monitor(?MODULE, i_do_async_req_list, + [self(), Url, Method, + Options ++ [{stream_chunk_size, 1000}]]), + io:format("Spawned process ~p~n", [Pid]), + wait_for_resp(Pid). + +wait_for_resp(Pid) -> + receive + {async_result, Pid, Res} -> + Res; + {'DOWN', _, _, Pid, Reason} -> + {'EXIT', Reason}; + {'DOWN', _, _, _, _} -> + wait_for_resp(Pid); + Msg -> + io:format("Recvd unknown message: ~p~n", [Msg]), + wait_for_resp(Pid) + after 10000 -> + {error, timeout} + end. + +i_do_async_req_list(Parent, Url, Method, Options) -> + Res = ibrowse:send_req(Url, [], Method, [], [{stream_to, self()} | Options]), + case Res of + {ibrowse_req_id, Req_id} -> + Result = wait_for_async_resp(Req_id, undefined, undefined, []), + Parent ! {async_result, self(), Result}; + Err -> + Parent ! {async_result, self(), Err} + end. + +wait_for_async_resp(Req_id, Acc_Stat_code, Acc_Headers, Body) -> + receive + {ibrowse_async_headers, Req_id, StatCode, Headers} -> + wait_for_async_resp(Req_id, StatCode, Headers, Body); + {ibrowse_async_response_end, Req_id} -> + Body_1 = list_to_binary(lists:reverse(Body)), + {ok, Acc_Stat_code, Acc_Headers, Body_1}; + {ibrowse_async_response, Req_id, Data} -> + wait_for_async_resp(Req_id, Acc_Stat_code, Acc_Headers, [Data | Body]); + Err -> + {ok, Acc_Stat_code, Acc_Headers, Err} + end. execute_req(Url, Method, Options) -> - io:format("~s, ~p: ", [Url, Method]), + io:format("~7.7w, ~50.50s: ", [Method, Url]), Result = (catch ibrowse:send_req(Url, [], Method, [], Options)), - case Result of + case Result of {ok, SCode, _H, _B} -> io:format("Status code: ~p~n", [SCode]); Err -> |