From: <sc...@hy...> - 2010-03-09 19:52:29
|
Author: scottmf Date: 2010-03-09 11:52:20 -0800 (Tue, 09 Mar 2010) New Revision: 14363 URL: http://svn.hyperic.org/?view=rev&root=Hyperic+HQ&revision=14363 Modified: trunk/src/org/hyperic/hibernate/Util.java trunk/src/org/hyperic/hq/escalation/server/session/EscalationManagerEJBImpl.java trunk/src/org/hyperic/hq/escalation/server/session/EscalationRuntime.java trunk/src/org/hyperic/hq/events/server/session/AlertConditionsSatisfiedListener.java trunk/src/org/hyperic/hq/events/server/session/ClassicEscalatableCreator.java trunk/src/org/hyperic/hq/galerts/processor/EventListener.java trunk/src/org/hyperic/hq/zevents/BufferedListener.java Log: [HQ-1905] [HHQ-3783] [HHQ-3784] fixed deadlock bug while processing escalations Modified: trunk/src/org/hyperic/hibernate/Util.java =================================================================== --- trunk/src/org/hyperic/hibernate/Util.java 2010-03-09 18:47:14 UTC (rev 14362) +++ trunk/src/org/hyperic/hibernate/Util.java 2010-03-09 19:52:20 UTC (rev 14363) @@ -42,6 +42,7 @@ import javax.management.ObjectName; import javax.naming.InitialContext; import javax.naming.NamingException; +import javax.transaction.RollbackException; import net.sf.ehcache.Cache; import net.sf.ehcache.CacheManager; @@ -52,6 +53,7 @@ import org.hibernate.Hibernate; import org.hibernate.Interceptor; import org.hibernate.SessionFactory; +import org.hibernate.StaleStateException; import org.hibernate.cfg.Configuration; import org.hibernate.cfg.Environment; import org.hibernate.dialect.Dialect; @@ -411,4 +413,18 @@ } return healths; } + + public static boolean tranRolledBack(Throwable t) { + if (t == null) { + return false; + } + Throwable tmp = t; + do { + if (tmp instanceof StaleStateException || + tmp instanceof RollbackException) { + return true; + } + } while ((tmp = tmp.getCause()) != null); + return false; + } } Modified: trunk/src/org/hyperic/hq/escalation/server/session/EscalationManagerEJBImpl.java =================================================================== --- trunk/src/org/hyperic/hq/escalation/server/session/EscalationManagerEJBImpl.java 2010-03-09 18:47:14 UTC (rev 14362) +++ trunk/src/org/hyperic/hq/escalation/server/session/EscalationManagerEJBImpl.java 2010-03-09 19:52:20 UTC (rev 14363) @@ -545,19 +545,14 @@ try { EscalationAlertType type = s.getAlertType(); - // Escalation state change - AuthzSubject overlord = - AuthzSubjectManagerEJBImpl.getOne().getOverlordPojo(); - type.changeAlertState(esc, overlord, - EscalationStateChange.ESCALATED); - - ActionExecutionInfo execInfo = - new ActionExecutionInfo(esc.getShortReason(), - esc.getLongReason(), - esc.getAuxLogs()); - + // HHQ-3784 to avoid deadlocks use the this table order when updating/inserting: + // 1) EAM_ESCALATION_STATE, 2) EAM_ALERT, 3) EAM_ALERT_ACTION_LOG + AuthzSubject overlord = AuthzSubjectManagerEJBImpl.getOne().getOverlordPojo(); + ActionExecutionInfo execInfo = new ActionExecutionInfo( + esc.getShortReason(), esc.getLongReason(), esc.getAuxLogs()); String detail = action.executeAction(esc.getAlertInfo(), execInfo); + type.changeAlertState(esc, overlord, EscalationStateChange.ESCALATED); type.logActionDetails(esc, action, detail, null); } catch(Exception exc) { log.error("Unable to execute action [" + @@ -800,6 +795,7 @@ boolean suppressNotification) throws PermissionException { + final boolean debug = log.isDebugEnabled(); Integer alertId = esc.getAlertInfo().getId(); boolean acknowledged = !fixed; @@ -820,22 +816,24 @@ // HQ-1295: Does user have sufficient permissions? // ...check if user can fix/acknowledge this alert... + // HHQ-3784 to avoid deadlocks use the this table order when updating/inserting: + // 1) EAM_ESCALATION_STATE, 2) EAM_ALERT, 3) EAM_ALERT_ACTION_LOG SessionBase.canFixAcknowledgeAlerts(subject, esc.getDefinition().getDefinitionInfo()); if (fixed) { - if (moreInfo == null || moreInfo.trim().length() == 0) + if (moreInfo == null || moreInfo.trim().length() == 0) { moreInfo = "(Fixed by " + subject.getFullName() + ")"; - - log.debug(subject.getFullName() + " has fixed alertId=" + alertId); + } + if (debug) log.debug(subject.getFullName() + " has fixed alertId=" + alertId); + if (state != null) { + endEscalation(state); + } type.changeAlertState(esc, subject, EscalationStateChange.FIXED); type.logActionDetails(esc, null, moreInfo, subject); - if (state != null) - endEscalation(state); } else { if (moreInfo == null || moreInfo.trim().length() == 0) { moreInfo = ""; } - if (state.getAcknowledgedBy() != null) { log.warn(subject.getFullName() + " attempted to acknowledge "+ type + " alert=" + alertId + " but it was already "+ @@ -843,19 +841,14 @@ state.getAcknowledgedBy().getFullName()); return; } - log.debug(subject.getFullName() + " has acknowledged alertId=" + - alertId); - type.changeAlertState(esc, subject, - EscalationStateChange.ACKNOWLEDGED); - type.logActionDetails(esc, null, - subject.getFullName() + " acknowledged " + - "the alert" + moreInfo, subject); - + if (debug) log.debug(subject.getFullName() + " has acknowledged alertId=" + alertId); state.setAcknowledgedBy(subject); + type.changeAlertState(esc, subject, EscalationStateChange.ACKNOWLEDGED); + String msg = subject.getFullName() + " acknowledged " + "the alert" + moreInfo; + type.logActionDetails(esc, null, msg, subject); } - if (!suppressNotification - && AlertRegulator.getInstance().alertNotificationsAllowed()) { + if (!suppressNotification && AlertRegulator.getInstance().alertNotificationsAllowed()) { if (state != null) { sendNotifications(state, esc, subject, state.getEscalation().isNotifyAll(), fixed, Modified: trunk/src/org/hyperic/hq/escalation/server/session/EscalationRuntime.java =================================================================== --- trunk/src/org/hyperic/hq/escalation/server/session/EscalationRuntime.java 2010-03-09 18:47:14 UTC (rev 14362) +++ trunk/src/org/hyperic/hq/escalation/server/session/EscalationRuntime.java 2010-03-09 19:52:20 UTC (rev 14363) @@ -35,6 +35,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.hyperic.hibernate.Util; import org.hyperic.hq.application.HQApp; import org.hyperic.hq.application.TransactionListener; import org.hyperic.hq.escalation.shared.EscalationManagerLocal; @@ -104,7 +105,22 @@ } public void run() { - runEscalation(_stateId); + int maxRetries = 3; + for (int i=0; i<maxRetries; i++) { + try { + runEscalation(_stateId); + break; + } catch (Throwable e) { + if ((i+1) < maxRetries && Util.tranRolledBack(e)) { + String times = (maxRetries - i == 1) ? "time" : "times"; + _log.warn("Warning, exception occurred while running escalation. will retry " + + (maxRetries - (i+1)) + " more " + times + ". errorMsg: " + e); + continue; + } else { + _log.error("Exception occurred, runEscalation() will not be retried",e); + } + } + } } } Modified: trunk/src/org/hyperic/hq/events/server/session/AlertConditionsSatisfiedListener.java =================================================================== --- trunk/src/org/hyperic/hq/events/server/session/AlertConditionsSatisfiedListener.java 2010-03-09 18:47:14 UTC (rev 14362) +++ trunk/src/org/hyperic/hq/events/server/session/AlertConditionsSatisfiedListener.java 2010-03-09 19:52:20 UTC (rev 14363) @@ -3,6 +3,9 @@ import java.util.Iterator; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.hyperic.hibernate.Util; import org.hyperic.hq.events.shared.AlertManagerLocal; import org.hyperic.hq.measurement.server.session.AlertConditionsSatisfiedZEvent; import org.hyperic.hq.zevents.ZeventListener; @@ -14,14 +17,37 @@ * */ public class AlertConditionsSatisfiedListener implements ZeventListener { + + private static final int MAX_RETRIES = 3; + private final Log _log = LogFactory.getLog(AlertConditionsSatisfiedListener.class); public void processEvents(List events) { AlertManagerLocal am = AlertManagerEJBImpl.getOne(); - for (Iterator i=events.iterator(); i.hasNext(); ) { - AlertConditionsSatisfiedZEvent z = (AlertConditionsSatisfiedZEvent)i.next(); - am.fireAlert(z); + for (Iterator it=events.iterator(); it.hasNext(); ) { + AlertConditionsSatisfiedZEvent z = (AlertConditionsSatisfiedZEvent)it.next(); + // HQ-1905 need to retry due to potential StaleStateExceptions + for (int ii=0; ii<MAX_RETRIES; ii++) { + try { + am.fireAlert(z); + break; + } catch (Throwable e) { + if ((ii+1) < MAX_RETRIES && Util.tranRolledBack(e)) { + String times = (MAX_RETRIES - ii == 1) ? "time" : "times"; + _log.warn("Warning, exception occurred while running fireAlert. will retry " + + (MAX_RETRIES - (ii+1)) + " more " + times + ". errorMsg: " + e); + continue; + } else { + _log.error("fireAlert threw an Exception, will not be retried",e); + break; + } + } + } } } + + public String toString() { + return "AlertConditionsSatisfiedListener"; + } } Modified: trunk/src/org/hyperic/hq/events/server/session/ClassicEscalatableCreator.java =================================================================== --- trunk/src/org/hyperic/hq/events/server/session/ClassicEscalatableCreator.java 2010-03-09 18:47:14 UTC (rev 14362) +++ trunk/src/org/hyperic/hq/events/server/session/ClassicEscalatableCreator.java 2010-03-09 19:52:20 UTC (rev 14363) @@ -33,6 +33,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.hyperic.hibernate.Util; import org.hyperic.hq.appdef.shared.AppdefEntityID; import org.hyperic.hq.application.HQApp; import org.hyperic.hq.application.TransactionListener; @@ -156,6 +157,10 @@ alertMan.logActionDetail(alert, act, detail, null); } catch(Exception e) { + // HQ-1905 want to recover cleanly from a rollback at the top level + if (Util.tranRolledBack(e)) { + throw new RuntimeException(e); + } // For any exception, just log it. We can't afford not // letting the other actions go un-processed. _log.warn("Error executing action [" + act + "]", e); @@ -163,20 +168,29 @@ } } - private void registerAlertFiredEvent(final Integer alertId, final AlertConditionsSatisfiedZEventPayload payload) { + private void registerAlertFiredEvent(final Integer alertId, + final AlertConditionsSatisfiedZEventPayload payload) { try { HQApp.getInstance().addTransactionListener(new TransactionListener() { public void afterCommit(boolean success) { if(success) { - messenger.publishMessage(EventConstants.EVENTS_TOPIC,new AlertFiredEvent(alertId, _def.getId(), new AppdefEntityID(_def.getResource()),_def.getName(),payload.getTimestamp(), - payload.getMessage())); + messenger.publishMessage( + EventConstants.EVENTS_TOPIC, + new AlertFiredEvent(alertId, _def.getId(), new AppdefEntityID(_def.getResource()), + _def.getName(),payload.getTimestamp(), payload.getMessage())); } } public void beforeCommit() { } }); } catch (Throwable t) { - _log.error("Error registering to send an AlertFiredEvent on transaction commit. The alert will be fired, but the event will not be sent. This could cause a future recovery alert not to fire.", t); + // HQ-1905 want to recover cleanly from a rollback at the top level + if (Util.tranRolledBack(t)) { + throw new RuntimeException(t); + } + _log.error("Error registering to send an AlertFiredEvent on transaction commit. " + + "The alert will be fired, but the event will not be sent. " + + "This could cause a future recovery alert not to fire.", t); } } Modified: trunk/src/org/hyperic/hq/galerts/processor/EventListener.java =================================================================== --- trunk/src/org/hyperic/hq/galerts/processor/EventListener.java 2010-03-09 18:47:14 UTC (rev 14362) +++ trunk/src/org/hyperic/hq/galerts/processor/EventListener.java 2010-03-09 19:52:20 UTC (rev 14363) @@ -27,19 +27,26 @@ import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.hyperic.hq.zevents.ZeventListener; class EventListener implements ZeventListener { private final GalertProcessor _aProc; + private final Log _log = LogFactory.getLog(EventListener.class); EventListener(GalertProcessor aProc) { _aProc = aProc; } public void processEvents(List events) { - _aProc.processEvents(events); + try { + _aProc.processEvents(events); + } catch (Throwable e) { + _log.error(e,e); + } } public String toString() { Modified: trunk/src/org/hyperic/hq/zevents/BufferedListener.java =================================================================== --- trunk/src/org/hyperic/hq/zevents/BufferedListener.java 2010-03-09 18:47:14 UTC (rev 14362) +++ trunk/src/org/hyperic/hq/zevents/BufferedListener.java 2010-03-09 19:52:20 UTC (rev 14363) @@ -26,6 +26,7 @@ package org.hyperic.hq.zevents; import java.util.List; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -65,7 +66,15 @@ } public void processEvents(List events) { - execute(new BufferedEventRunnable(events, _target)); + try { + execute(new BufferedEventRunnable(events, _target)); + } catch (Throwable e) { + _log.error(e,e); + } + int size = getQueue().size(); + if (_log.isDebugEnabled() && size != 0 && (size % 100) == 0) { + _log.debug("obj=" + this.toString() + ", queue size=" + size); + } } public boolean equals(Object obj) { |