Hi,

I have a five node JGroup cluster and when the traffic increased among nodes I can observed following JGroups error in my cluster.

I checked the test programs shipped with JGroups but was not able to idetify one that need to run to solve this issue. I ran RpcDispatcherSpeedTest if needed I can share that result. My configurations also given below. Any hint / idea to to find out issue is much appreciated.

Caused by: org.jgroups.TimeoutException: timeout waiting for response from bpsp-40493 (, site-id=null, rack-id=null, machine-id=bpsp), request: org.jgroups.blocks.UnicastRequest@6a0972c8, req_id=639558, mode=GET_ALL, target=bpsp-40493 (, site-id=null, rack-id=null, machine-id=bpsp)
at org.jgroups.blocks.MessageDispatcher.sendMessage(MessageDispatcher.java:429)
at org.infinispan.remoting.transport.jgroups.CommandAwareRpcDispatcher.processSingleCall(CommandAwareRpcDispatcher.java:374)
at org.infinispan.remoting.transport.jgroups.CommandAwareRpcDispatcher.invokeRemoteCommand(CommandAwareRpcDispatcher.java:188)
... 108 more

Configuration

<config xmlns="urn:org:jgroups" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:org:jgroups &lt;a href=" http:="" www.jgroups.org="" schema="" JGroups-3.6.xsd"="">http://www.jgroups.org/schema/JGroups-3.6.xsd">
<UDP mcast_addr="${jgroups.udp.mcast_addr:228.6.7.8}"
mcast_port="${jgroups.udp.mcast_port:46656}"
ucast_send_buf_size="1m"
mcast_send_buf_size="1m"
ucast_recv_buf_size="20m"
mcast_recv_buf_size="25m"
ip_ttl="${jgroups.ip_ttl:2}"
bind_addr="127.0.0.1"
thread_naming_pattern="pl"
enable_diagnostics="false"
bundler_type="sender-sends-with-timer"

     thread_pool.min_threads="${jgroups.thread_pool.min_threads:2}"
     thread_pool.max_threads="${jgroups.thread_pool.max_threads:30}"
     thread_pool.keep_alive_time="60000"
     thread_pool.queue_enabled="false"

     internal_thread_pool.min_threads="${jgroups.internal_thread_pool.min_threads:5}"
     internal_thread_pool.max_threads="${jgroups.internal_thread_pool.max_threads:20}"
     internal_thread_pool.keep_alive_time="60000"
     internal_thread_pool.queue_enabled="true"
     internal_thread_pool.queue_max_size="500"

     oob_thread_pool.min_threads="${jgroups.oob_thread_pool.min_threads:20}"
     oob_thread_pool.max_threads="${jgroups.oob_thread_pool.max_threads:200}"
     oob_thread_pool.keep_alive_time="60000"
     oob_thread_pool.queue_enabled="false"
        />
<PING/>
<MERGE3 min_interval="10000"
        max_interval="30000"
        />
<FD_SOCK/>
<FD_ALL timeout="60000"
        interval="15000"
        timeout_check_interval="5000"/>
<VERIFY_SUSPECT timeout="5000"/>
<pbcast.NAKACK2 xmit_interval="1000"
                xmit_table_num_rows="50"
                xmit_table_msgs_per_row="1024"
                xmit_table_max_compaction_time="30000"
                max_msg_batch_size="100"
                resend_last_seqno="true"/>
<UNICAST3 xmit_interval="500"
          xmit_table_num_rows="50"
          xmit_table_msgs_per_row="1024"
          xmit_table_max_compaction_time="30000"
          max_msg_batch_size="100"
          conn_expiry_timeout="0"/>
<pbcast.STABLE stability_delay="500"
               desired_avg_gossip="5000"
               max_bytes="1M"/>
<pbcast.GMS print_local_addr="false"
            join_timeout="15000"/>
<UFC max_credits="2m"
     min_threshold="0.40"/>
<MFC max_credits="2m"
     min_threshold="0.40"/>
<FRAG2/>

</config>