[Personalrobots-commit] SF.net SVN: personalrobots:[19293] pkg/trunk

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 19293
          http://personalrobots.svn.sourceforge.net/personalrobots/?rev=19293&view=rev
Author:   wattsk
Date:     2009-07-21 06:44:50 +0000 (Tue, 21 Jul 2009)

Log Message:
-----------
Updated hd and cpu monitor to use threading to reduce load and lag. Hddtemp now called with socket

Modified Paths:
--------------
    pkg/trunk/pr2/pr2_computer_monitor/cpu_monitor.launch
    pkg/trunk/pr2/pr2_computer_monitor/hd_monitor.launch
    pkg/trunk/pr2/pr2_computer_monitor/scripts/cpu_monitor.py
    pkg/trunk/pr2/pr2_computer_monitor/scripts/hd_monitor.py
    pkg/trunk/robot_descriptions/pr2/pr2_configurations/pr2_head_cart/cart_monitors.launch
    pkg/trunk/stacks/pr2/pr2_alpha/pr2_monitors.launch

Removed Paths:
-------------
    pkg/trunk/pr2/pr2_computer_monitor/scripts/disk_usage.py

Modified: pkg/trunk/pr2/pr2_computer_monitor/cpu_monitor.launch
===================================================================

--- pkg/trunk/pr2/pr2_computer_monitor/cpu_monitor.launch	2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/pr2/pr2_computer_monitor/cpu_monitor.launch	2009-07-21 06:44:50 UTC (rev 19293)
@@ -2,8 +2,9 @@
  <!-- Should make this be localhost or whatever -->
  <machine name="local_root" user="root" address="nsf" ros-root="$(env ROS_ROOT)" ros-package-path="$(env ROS_PACKAGE_PATH)" default="never"/> 
 
-  <param name="no_cpu_temp" value="True" />
-
  <!-- Set machine to robot machine in final version -->
+    <param name="check_ipmi_tool" value="False" />	
   <node machine="local_root" pkg="pr2_computer_monitor" type="cpu_monitor.py"  />
+
+
 </launch>
\ No newline at end of file

Modified: pkg/trunk/pr2/pr2_computer_monitor/hd_monitor.launch
===================================================================
--- pkg/trunk/pr2/pr2_computer_monitor/hd_monitor.launch	2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/pr2/pr2_computer_monitor/hd_monitor.launch	2009-07-21 06:44:50 UTC (rev 19293)
@@ -5,5 +5,8 @@
   <param name="no_hd_temp_warn" value="True" />
 
  <!-- Set machine to robot machine in final version -->
-  <node machine="local_root" pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda /dev/sdb" />
+  <node machine="local_root" pkg="pr2_computer_monitor" type="hd_monitor.py" 
+  args="$(optenv HOME /home)" />
+
+
 </launch>
\ No newline at end of file

Modified: pkg/trunk/pr2/pr2_computer_monitor/scripts/cpu_monitor.py
===================================================================
--- pkg/trunk/pr2/pr2_computer_monitor/scripts/cpu_monitor.py	2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/pr2/pr2_computer_monitor/scripts/cpu_monitor.py	2009-07-21 06:44:50 UTC (rev 19293)
@@ -41,8 +41,10 @@
 
 import traceback
 import threading
+from threading import Timer
 import sys, os, time
 import subprocess
+import string
 
 import socket
 
@@ -50,115 +52,219 @@
 
 stat_dict = { 0: 'OK', 1: 'Warning', 2: 'Error' }
 
-def check_cpu_temp(hostname):
-    stat = DiagnosticStatus()
-    stat.name = '%s CPU Temp Data' % hostname
-    stat.strings = []
-    stat.values = []
+# Output entire IPMI data set
+def check_ipmi():
+    diag_strs = []
+    diag_vals = []
+    diag_msgs = []
+    diag_level = 0
 
     try:
-        p = subprocess.Popen('sudo ipmitool sdr type Temperature', 
+        p = subprocess.Popen('ipmitool sdr',
                              stdout = subprocess.PIPE,
                              stderr = subprocess.PIPE, shell = True)
         stdout, stderr = p.communicate()
         retcode = p.returncode
-                
-        stat.level = retcode
-        
-        if retcode == 0:
-            stat.level = 0
-            stat.message = 'OK'
-        else:
-            stat.level = 2
-            stat.message = 'Bad return code'
-            stat.strings.append(DiagnosticString(label = 'Error code: %d' % retcode, value = stderr.replace('\n', '')))
-                
                         
-        # Parse STDOUT for temp, device ID
+        if retcode != 0:
+            diag_level = 2
+            diag_msg = [ 'ipmitool Error' ]
+            diag_strs = [ DiagnosticString(label = 'IPMI Error', value = stderr) ]
+            return diag_strs, diag_vals, diag_msgs, diag_level
+
         lines = stdout.split('\n')
         if len(lines) < 2:
-            stat.strings.append(DiagnosticString(label = 'ipmitool status', value = 'No output'))
-            stat.message = 'No response'
-            stat.level = 1
+            diag_strs = [ DiagnosticString(label = 'ipmitool status', value = 'No output') ]
+            diag_msgs = [ 'No response' ]
+            diag_level = 2
+            return diag_strs, diag_vals, diag_msgs, diag_level
+
         for ln in lines:
-            
-            lst = ln.split('|')
-            if lst[-1].endswith('degrees C'):
-                tmp = lst[-1].rstrip('degrees C').lstrip()
-                if unicode(tmp).isnumeric():
-                    stat.values.append(DiagnosticValue(label = lst[0], value = float(tmp)))
-            else:
-                stat.strings.append(DiagnosticString(label = lst[0], value = lst[-1]))
-            
+            if len(ln) < 2:
+                continue
 
+            words = ln.split('|')
+            name = words[0].strip()
+            ipmi_val = words[1].strip()
+            stat_byte = words[1].strip()
+
+            # CPU temps
+            if words[0].startswith('CPU') and words[0].strip().endswith('Temp'):
+                if words[1].strip().endswith('degrees C'):
+                    tmp = ipmi_val.rstrip(' degrees C').lstrip()
+                    if unicode(tmp).isnumeric():
+                        temperature = float(tmp)
+                        diag_vals.append(DiagnosticValue(label = name, value = temperature))
+
+                        cpu_name = name.split()[0]
+                        if temperature >= 80 and temperature < 85:
+                            diag_level = max(diag_level, 1)
+                            if diag_msgs.count('CPU Hot') == 0:
+                                diag_msgs.append('CPU Warm')
+                        if temperature >= 85:
+                            diag_level = max(diag_level, 2)
+                            diag_msgs.append('CPU Hot')                                
+                            # Don't keep CPU Warm in list if CPU is hot
+                            if diag_msgs.count('CPU Warm') > 0:
+                                idx = diag_msgs.index('CPU Warm')
+                                diag_msgs.pop(idx)
+
+                            
+                else:
+                    diag_strs.append(DiagnosticString(label = name, value = words[1]))
+
+
+            # MP, BP, FP temps
+            if name == 'MB Temp' or name == 'BP Temp' or name == 'FP Temp':
+                if ipmi_val.endswith('degrees C'):
+                    tmp = ipmi_val.rstrip(' degrees C').lstrip()
+                    if unicode(tmp).isnumeric():
+                        temperature = float(tmp)
+                        diag_vals.append(DiagnosticValue(label = name, value = temperature))
+                        
+                        dev_name = name.split()[0]
+                        if temperature >= 60 and temperature < 70:
+                            diag_level = max(diag_level, 1)
+                            diag_msgs.append('%s Warm' % dev_name)
+                        if temperature >= 70:
+                            diag_level = max(diag_level, 2)
+                            diag_msgs.append('%s Hot' % dev_name)
+
+                else:
+                    diag_strs.append(DiagnosticString(label = name, value = ipmi_val))
+        
+            # CPU fan speeds
+            if (name.startswith('CPU') and name.endswith('Fan')) or name == 'MB Fan':
+                if ipmi_val.endswith('RPM'):
+                    rpm = ipmi_val.rstrip(' RPM').lstrip()
+                    if unicode(rpm).isnumeric():
+                        rpm = float(rpm)
+                        diag_vals.append(DiagnosticValue(label = name, value = rpm))
+                    else:
+                        diag_strs.append(DiagnosticString(label = name, value = ipmi_val))
+
+            # If CPU is hot we get an alarm from ipmitool, report that too
+            if name.startswith('CPU') and name.endswith('hot'):
+                if ipmi_val == '0x01':
+                    diag_strs.append(DiagnosticString(label = name, value = 'OK'))
+                else:
+                    diag_strs.append(DiagnosticString(label = name, value = 'Hot'))
+                    diag_level = max(diag_level, 2)
+                    diag_msgs.append('CPU Hot Alarm')
+
     except Exception, e:
-        traceback.print_exc()
+        diag_strs.append(DiagnosticString(label = 'Exception', value = traceback.format_exc()))
+        diag_level = 2
+        diag_msgs.append('Exception')
+
+    return diag_strs, diag_vals, diag_msgs, diag_level
         
-        stat.level = 2
-        stat.message = 'Exception'
-        stat.strings.append(DiagnosticString(label = 'Exception', value = str(e)))
+
+# Check core temps 
+# Use 'find /sys -name temp1_input' to find cores
+# Read from every core, divide by 1000
+def check_core_temps(sys_temp_strings):
+    diag_vals = []
+    diag_strs = []
+    diag_level = 0
+    diag_msgs = []
+    
+    for index, temp_str in enumerate(sys_temp_strings):
+        if len(temp_str) < 5:
+            continue
         
-    return stat
+        cmd = 'cat %s' % temp_str
+        p = subprocess.Popen(cmd, stdout = subprocess.PIPE, 
+                             stderr = subprocess.PIPE, shell = True)
+        stdout, stderr = p.communicate()
+        retcode = p.returncode
 
+        if retcode != 0:
+            diag_level = 2
+            diag_msg = [ 'Core Temp Error' ]
+            diag_strs = [ DiagnosticString(label = 'Core Temp Error', value = stderr), DiagnosticString(label = 'Output', value = stdout) ]
+            return diag_strs, diag_vals, diag_msgs, diag_level
+  
+        tmp = stdout.strip()
+        if unicode(tmp).isnumeric():
+            temp = float(tmp) / 1000
+            diag_vals.append(DiagnosticValue(label = 'Core %d Temp' % index, value = temp))
 
+            if temp >= 80 and temp < 85:
+                diag_level = max(diag_level, 1)
+                diag_msgs.append('Warm')
+            if temp >= 85:
+                diag_level = max(diag_level, 2)
+                diag_msgs.append('Hot')
 
+        else:
+            diag_strs.append(DiagnosticString(label = 'Core %s Temp' % index, value = tmp))
 
-def check_nfs_stat(hostname):
-    stat = DiagnosticStatus()
-    stat.name = '%s NFS I/O' % hostname
-    stat.level = 0
-    stat.message = 'OK'
-    stat.strings = []
-    stat.values = []
+    return diag_strs, diag_vals, diag_msgs, diag_level
 
+## Checks clock speed from reading from CPU info
+def check_clock_speed(enforce_speed):
+    strs = []
+    vals = []
+    msgs = []
+    lvl = 0
+
     try:
-        p = subprocess.Popen('iostat -n',
+        p = subprocess.Popen('cat /proc/cpuinfo | grep MHz', 
                              stdout = subprocess.PIPE,
                              stderr = subprocess.PIPE, shell = True)
         stdout, stderr = p.communicate()
         retcode = p.returncode
-        
-        for index, row in enumerate(stdout.split('\n')):
-            if index < 3:
-                continue
 
-            lst = row.split()
-            if len(lst) < 7:
+        if retcode != 0:
+            lvl = 2
+            msgs = [ 'Clock speed error' ]
+            strs = [ DiagnosticString(label = 'Clock speed error', value = stderr), 
+                          DiagnosticString(label = 'Output', value = stdout) ]
+            
+            return (strs, vals, msgs, lvl)
+
+        for index, ln in enumerate(stdout.split('\n')):
+            words = ln.split(':')
+            if len(words) < 2:
                 continue
 
-            file_sys = lst[0]
-            read_blk = float(lst[1])
-            write_blk = float(lst[2])
-            read_blk_dir = float(lst[3])
-            write_blk_dir = float(lst[4])
-            r_blk_srv = float(lst[5])
-            w_blk_srv = float(lst[6])
+            speed = words[1].strip().split('.')[0]
+            if unicode(speed).isnumeric():
+                mhz = float(speed)
+                vals.append(DiagnosticValue(label = 'Core %d Speed' % index, value = mhz))
+                
+                if mhz < 2240 and mhz > 2150:
+                    lvl = max(lvl, 1)
+                if mhz <= 2150:
+                    lvl = max(lvl, 2)
 
-            stat.values.append(DiagnosticValue(
-                    label = '%s Read Blks/s' % file_sys, value=read_blk))
-            stat.values.append(DiagnosticValue(
-                    label = '%s Write Blks/s' % file_sys, value=write_blk))
-            stat.values.append(DiagnosticValue(
-                    label = '%s Read Blk dir/s' % file_sys, value=read_blk_dir))
-            stat.values.append(DiagnosticValue(
-                    label = '%s Write Blks dir/s' % file_sys, value=write_blk_dir))
-            stat.values.append(DiagnosticValue(
-                    label = '%s Read Blks srv/s' % file_sys, value=r_blk_srv))
-            stat.values.append(DiagnosticValue(
-                    label = '%s Write Blks srv/s' % file_sys, value=w_blk_srv))
-        
+            else:
+                lvl = max(lvl, 2)
+                strs.append(DiagnosticString(label = 'Core %d Speed' % index, value = speed))
+
+        if not enforce_speed:
+            lvl = 0
+
+        if lvl == 1 and enforce_speed:
+            msgs = [ 'Core slowing' ]
+        elif lvl == 2 and enforce_speed:
+            msgs = [ 'Core throttled' ]
+
     except Exception, e:
-        traceback.print_exc()
-        stat.level = 2
-        stat.message = 'Exception'
-        stat.strings.append(DiagnosticString(label = 'Exception', value = str(e)))
-        
-    return stat
+        rospy.logerr(traceback.format_exc())
+        lvl = 2
+        msgs.append('Exception')
+        strs.append(DiagnosticString(label = 'Exception', value = traceback.format_exc()))
 
+    return strs, vals, msgs, lvl
+                    
+
+# Add msgs output, too
 def check_uptime():
     level = 0
     vals = []
-    string = DiagnosticString(label = 'Load Average Status', value = 'Error')
+    str = DiagnosticString(label = 'Load Average Status', value = 'Error')
     
     try:
         p = subprocess.Popen('uptime', stdout = subprocess.PIPE, 
@@ -182,15 +288,16 @@
         if load1 > 35 or load5 > 25 or load15 > 20:
             level = 2
 
-        string = DiagnosticString(label = 'Load Average Status', value = stat_dict[level])
+        str = DiagnosticString(label = 'Load Average Status', value = stat_dict[level])
 
     except Exception, e:
-        rospy.logerr(traceback.print_exc())
+        rospy.logerr(traceback.format_exc())
         level = 2
-        DiagnosticString(label = 'Load Average Status', value = str(e))
+        str = DiagnosticString(label = 'Load Average Status', value = str(e))
         
-    return level, vals, string
+    return level, vals, str
 
+# Add msgs output
 def check_memory():
     values = []
     str = DiagnosticString(label = 'Memory Status', value = 'Exception')
@@ -227,13 +334,11 @@
     return level, values, str
 
 # Use mpstat
-def check_usage(hostname):
-    stat = DiagnosticStatus()
-    stat.name = '%s CPU Usage' % hostname
-    stat.level = 0
-    stat.strings = []
-    stat.values = []
-
+def check_mpstat():
+    strs = []
+    vals = []
+    mp_level = 0
+    
     try:
         p = subprocess.Popen('mpstat -P ALL 1 1',
                              stdout = subprocess.PIPE,
@@ -259,71 +364,338 @@
             user = float(lst[3])
             system = float(lst[5])
             
-            stat.values.append(DiagnosticValue(label = 'CPU %s User' % cpu_name, value = user))
-            stat.values.append(DiagnosticValue(label = 'CPU %s System' % cpu_name, value = system))
-            stat.values.append(DiagnosticValue(label = 'CPU %s Idle' % cpu_name, value = idle))
+            vals.append(DiagnosticValue(label = 'CPU %s User' % cpu_name, value = user))
+            vals.append(DiagnosticValue(label = 'CPU %s System' % cpu_name, value = system))
+            vals.append(DiagnosticValue(label = 'CPU %s Idle' % cpu_name, value = idle))
 
-            cpu_lvl = 0
             if user > 75.0:
-                cpu_lvl = 1
+                mp_level = 1
             if user > 90.0:
-                cpu_lvl = 2
-            stat.level = max(stat.level, cpu_lvl)
+                mp_level = 2
 
-            stat.strings.append(DiagnosticString(label = 'CPU %s Status' % cpu_name, value = stat_dict[cpu_lvl]))
+            strs.append(DiagnosticString(label = 'CPU %s Status' % cpu_name, value = stat_dict[mp_level]))
+
+    except Exception, e:
+        mp_level = 2
+        strs.append(DiagnosticString(label = 'mpstat Exception', value = str(e)))
+
+    return mp_level, vals, strs
+
+## Returns names for core temperature files
+## Returns list of names, each name can be read like file
+def get_core_temp_names():
+    temp_vals = []
+    try:
+        p = subprocess.Popen('find /sys -name temp1_input', 
+                             stdout = subprocess.PIPE,
+                             stderr = subprocess.PIPE, shell = True)
+        stdout, stderr = p.communicate()
+        retcode = p.returncode
+
+        if retcode != 0:
+            rospy.logerr('Error find core temp locations: %s' % stderr)
+            return []
+        
+        for ln in stdout.split('\n'):
+            temp_vals.append(ln.strip())
+        
+        return temp_vals
+    except:
+        rospy.logerr('Exception finding temp vals: %s' % traceback.format_exc())
+        return []
+
+def update_status_stale(stat, last_update_time):
+    time_since_update = rospy.get_time() - last_update_time
+
+    level = stat.level
+    stale_status = 'OK'
+    if time_since_update > 20:
+        stale_status = 'Lagging'
+        level = max(level, 1)
+    if time_since_update > 35:
+        stale_status = 'Stale'
+        level = max(level, 2)
+        
+    stat.strings.pop(0)
+    stat.values.pop(0)
+    stat.strings.insert(0, DiagnosticString(label = 'Update Status', value = stale_status))
+    stat.values.insert(0, DiagnosticValue(label = 'Time Since Update', value = time_since_update))
+
+class CPUMonitor():
+    def __init__(self, hostname):
+        rospy.init_node('cpu_monitor_%s' % hostname)
+        
+        self._diag_pub = rospy.Publisher('/diagnostics', DiagnosticMessage)
+
+        self._mutex = threading.Lock()
+
+        self._check_ipmi = rospy.get_param('check_ipmi_tool', True)
+        self._enforce_speed = rospy.get_param('enforce_clock_speed', True)
+        
+        # Get temp_input files
+        self._temp_vals = get_core_temp_names()
+
+        # CPU stats
+        self._temp_stat = DiagnosticStatus()
+        self._temp_stat.name = '%s CPU Temperature' % hostname
+        self._temp_stat.level = 2
+        self._temp_stat.message = 'No Data'
+        self._temp_stat.strings = [ DiagnosticString(label = 'Update Status', value = 'No Data' )]
+        self._temp_stat.values = [ DiagnosticValue(label = 'Time Since Last Update', value = 100000 )]
+
+        self._usage_stat = DiagnosticStatus()
+        self._usage_stat.name = '%s CPU Usage' % hostname
+        self._usage_stat.level = 2
+        self._usage_stat.message = 'No Data'
+        self._usage_stat.strings = [ DiagnosticString(label = 'Update Status', value = 'No Data' )]
+        self._usage_stat.values = [ DiagnosticValue(label = 'Time Since Last Update', value = 100000 )]
+
+        self._nfs_stat = DiagnosticStatus()
+        self._nfs_stat.name = '%s NFS I/O' % hostname
+        self._nfs_stat.level = 2
+        self._nfs_stat.message = 'No Data'
+        self._nfs_stat.strings = [ DiagnosticString(label = 'Update Status', value = 'No Data' )]
+        self._nfs_stat.values = [ DiagnosticValue(label = 'Time Since Last Update', value = 100000 )]
+
+        self._last_temp_time = 0
+        self._last_usage_time = 0
+        self._last_nfs_time = 0
+        self._last_publish_time = 0
+
+        self._temps_timer = None
+        self._usage_timer = None
+        self._nfs_timer = None
+        self._publish_timer = None
+        ##@TODO Need wireless stuff, at some point, put NFS in usage status
+        
+        # Start checking everything
+        self.check_temps()
+        self.check_nfs_stat()
+        self.check_usage()
+
+        self._publish_timer = threading.Timer(1.0, self.publish_stats)
+        self._publish_timer.start()
+
+    ## Must have the lock to cancel everything
+    def cancel_timers(self):
+        if self._temps_timer:
+            self._temps_timer.cancel()
+
+        if self._nfs_timer:
+            self._nfs_timer.cancel()
+
+        if self._usage_timer:
+            self._usage_timer.cancel()
+
+        if self._publish_timer:
+            self._publish_timer.cancel()
+
+    def check_nfs_stat(self):
+        if rospy.is_shutdown():
+            self._mutex.acquire()
+            self.cancel_timers()
+            self._mutex.release()
+            return
+
+        nfs_level = 0
+        msg = 'OK'
+        strs = [ DiagnosticString(label = 'Update Status', value = 'OK' )]
+        vals = [ DiagnosticValue(label = 'Time Since Last Update', value = 0 )]
+
+        try:
+            p = subprocess.Popen('iostat -n',
+                                 stdout = subprocess.PIPE,
+                                 stderr = subprocess.PIPE, shell = True)
+            stdout, stderr = p.communicate()
+            retcode = p.returncode
             
+            for index, row in enumerate(stdout.split('\n')):
+                if index < 3:
+                    continue
+                
+                lst = row.split()
+                if len(lst) < 7:
+                    continue
+                
+                file_sys = lst[0]
+                read_blk = float(lst[1])
+                write_blk = float(lst[2])
+                read_blk_dir = float(lst[3])
+                write_blk_dir = float(lst[4])
+                r_blk_srv = float(lst[5])
+                w_blk_srv = float(lst[6])
+                
+                vals.append(DiagnosticValue(
+                        label = '%s Read Blks/s' % file_sys, value=read_blk))
+                vals.append(DiagnosticValue(
+                        label = '%s Write Blks/s' % file_sys, value=write_blk))
+                vals.append(DiagnosticValue(
+                        label = '%s Read Blk dir/s' % file_sys, value=read_blk_dir))
+                vals.append(DiagnosticValue(
+                        label = '%s Write Blks dir/s' % file_sys, value=write_blk_dir))
+                vals.append(DiagnosticValue(
+                        label = '%s Read Blks srv/s' % file_sys, value=r_blk_srv))
+                vals.append(DiagnosticValue(
+                        label = '%s Write Blks srv/s' % file_sys, value=w_blk_srv))
+                
+        except Exception, e:
+            rospy.logerr(traceback.format_exc())
+            nfs_level = 2
+            msg = 'Exception'
+            strings.append(DiagnosticString(label = 'Exception', value = str(e)))
+            
+        self._mutex.acquire()
+        
+        self._nfs_stat.level = nfs_level
+        self._nfs_stat.message = msg
+        self._nfs_stat.strings = strs
+        self._nfs_stat.values = vals
+        
+        self._last_nfs_time = rospy.get_time()
+
+        if not rospy.is_shutdown():
+            self._nfs_timer = threading.Timer(5.0, self.check_nfs_stat)
+            self._nfs_timer.start()
+        else:
+            self.cancel_timers()
+
+        self._mutex.release()
+
+    ## Call every 10sec at minimum
+    def check_temps(self):
+        if rospy.is_shutdown():
+            self._mutex.acquire()
+            self.cancel_timers()
+            self._mutex.release()
+            return
+
+        diag_strs = [ DiagnosticString(label = 'Update Status', value = 'OK' ) ]
+        diag_vals = [ DiagnosticValue(label = 'Time Since Last Update', value = 0 ) ]
+        diag_msgs = []
+        diag_level = 0
+
+        if self._check_ipmi:
+            ipmi_strs, ipmi_vals, ipmi_msgs, ipmi_level = check_ipmi()
+            diag_strs.extend(ipmi_strs)
+            diag_vals.extend(ipmi_vals)
+            diag_msgs.extend(ipmi_msgs)
+            diag_level = max(diag_level, ipmi_level)
+
+        core_strs, core_vals, core_msgs, core_level = check_core_temps(self._temp_vals)
+        diag_strs.extend(core_strs)
+        diag_vals.extend(core_vals)
+        diag_msgs.extend(core_msgs)
+        diag_level = max(diag_level, core_level)
+
+        clock_strs, clock_vals, clock_msgs, clock_level = check_clock_speed(self._enforce_speed)
+        diag_strs.extend(clock_strs)
+        diag_vals.extend(clock_vals)
+        diag_msgs.extend(clock_msgs)
+        diag_level = max(diag_level, clock_level)
+
+        diag_log = set(diag_msgs)
+        if len(diag_log) > 0:
+            message = string.join(diag_log, ', ')
+        else:
+            message = stat_dict[diag_level]
+
+        self._mutex.acquire()
+        self._last_temp_time = rospy.get_time()
+        
+        self._temp_stat.level = diag_level
+        self._temp_stat.message = message
+        self._temp_stat.strings = diag_strs
+        self._temp_stat.values = diag_vals
+
+        if not rospy.is_shutdown():
+            self._temp_timer = threading.Timer(5.0, self.check_temps)
+            self._temp_timer.start()
+        else:
+            self.cancel_timers()
+
+        self._mutex.release()
+
+    def check_usage(self):
+        if rospy.is_shutdown():
+            self._mutex.acquire()
+            self.cancel_timers()
+            self._mutex.release()
+            return 
+
+        diag_level = 0
+        diag_strs = [ DiagnosticString(label = 'Update Status', value = 'OK' ) ]
+        diag_vals = [ DiagnosticValue(label = 'Time Since Last Update', value = 0 )]
+        
+        # Check mpstat
+        mp_level, mp_vals, mp_strs = check_mpstat()
+        diag_vals.extend(mp_vals)
+        diag_strs.extend(mp_strs)
+        diag_level = max(diag_level, mp_level)
+            
         # Check uptime
         uptime_level, up_vals, up_str = check_uptime()
-        stat.values.extend(up_vals)
-        stat.strings.append(up_str)
-        stat.level = max(stat.level, uptime_level)
-
+        diag_vals.extend(up_vals)
+        diag_strs.append(up_str)
+        diag_level = max(diag_level, uptime_level)
+        
         # Check memory
         mem_level, mem_vals, mem_str = check_memory()
-        stat.values.extend(mem_vals)
-        stat.strings.append(mem_str)
-        stat.level = max(stat.level, mem_level)
+        diag_vals.extend(mem_vals)
+        diag_strs.append(mem_str)
+        diag_level = max(diag_level, mem_level)
+            
+
+        # Update status
+        self._mutex.acquire()
+        self._last_usage_time = rospy.get_time()
+        self._usage_stat.level = diag_level
+        self._usage_stat.values = diag_vals
+        self._usage_stat.strings = diag_strs
         
-        stat.message = stat_dict[stat.level]
+        self._usage_stat.message = stat_dict[diag_level]
 
-    except Exception, e:
-        stat.level = 2
-        stat.message = 'Exception'
-        stat.strings.append(DiagnosticString(label = 'mpstat Exception', value = str(e)))
+        if not rospy.is_shutdown():
+            self._usage_timer = threading.Timer(5.0, self.check_usage)
+            self._usage_timer.start()
+        else:
+            self.cancel_timers()
 
-    return stat
+        self._mutex.release()
 
 
-def main():
-    hds = []
+    def publish_stats(self):
+        self._mutex.acquire()
+                
+        # Update everything with last update times
+        update_status_stale(self._temp_stat, self._last_temp_time)
+        update_status_stale(self._usage_stat, self._last_usage_time)
+        update_status_stale(self._nfs_stat, self._last_nfs_time)
 
+        msg = DiagnosticMessage()
+        msg.status.append(self._temp_stat)
+        msg.status.append(self._usage_stat)
+        msg.status.append(self._nfs_stat)
+
+        if rospy.get_time() - self._last_publish_time > 0.5:
+            self._diag_pub.publish(msg)
+            self._last_publish_time = rospy.get_time()
+
+        if not rospy.is_shutdown():
+            self._publish_timer = threading.Timer(1.0, self.publish_stats)
+            self._publish_timer.start()
+        else:
+            self.cancel_timers()
+
+        self._mutex.release()
+
+if __name__ == '__main__':
     hostname = socket.gethostname()
-    
-    rospy.init_node('cpu_monitor_%s' % hostname, anonymous = True)
-    
-    pub = rospy.Publisher('/diagnostics', DiagnosticMessage)
 
-    no_cpu_temp = rospy.get_param('no_cpu_temp', False)
+    cpu_node = CPUMonitor(hostname)
 
-    while not rospy.is_shutdown():
-        msg = DiagnosticMessage()
-        msg.status = []
-         
-        # Temperature, don't check if user says not to
-        if not no_cpu_temp:
-            msg.status.append(check_cpu_temp(hostname))
-        
-        # Usage, memory, and load average
-        msg.status.append(check_usage(hostname))
 
-        # NFS status
-        msg.status.append(check_nfs_stat(hostname))
-        
-        pub.publish(msg)
-        time.sleep(1.0)
 
+    
 
+            
 
-if __name__ == '__main__':
-    main()
-            

Deleted: pkg/trunk/pr2/pr2_computer_monitor/scripts/disk_usage.py
===================================================================
--- pkg/trunk/pr2/pr2_computer_monitor/scripts/disk_usage.py	2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/pr2/pr2_computer_monitor/scripts/disk_usage.py	2009-07-21 06:44:50 UTC (rev 19293)
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-# Software License Agreement (BSD License)
-#
-# Copyright (c) 2008, Willow Garage, Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above
-#    copyright notice, this list of conditions and the following
-#    disclaimer in the documentation and/or other materials provided
-#    with the distribution.
-#  * Neither the name of the Willow Garage nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-# Author: Tully Foote
-
-import roslib
-roslib.load_manifest('runtime_monitor')
-
-from diagnostic_msgs.msg import DiagnosticMessage, DiagnosticStatus, DiagnosticValue, DiagnosticString
-import sys
-import rospy
-import socket
-from subprocess import Popen, PIPE
-
-import time
-
-
-NAME = 'disk_usage_monitor'
-
-def disk_usage_monitor(device, offset=5):
-    pub = rospy.Publisher("/diagnostics", DiagnosticMessage)
-    rospy.init_node(NAME, anonymous=True)
-
-    hostname = socket.gethostbyaddr(socket.gethostname())[0]
-
-    while not rospy.is_shutdown():
-        try:
-            p = Popen(["df", "-P", "--block-size=1G", device], stdout=PIPE, stdin=PIPE, stderr=PIPE)
-            res = p.wait()
-            (o,e) = p.communicate()
-        except OSError, (errno, msg):
-            if errno == 4:
-                break #ctrl-c interrupt
-            else:
-                raise
-        if (res == 0):
-            second_row = o.split("\n")[1]
-            g_available = float(second_row.split()[-3])
-            #print second_row, g_available
-            #sys.exit(-1)
-            
-            component_string = "Disk Usage for on host "+ hostname + " for device: " +device
-           
-            if (g_available > offset):
-                stat = DiagnosticStatus(0,component_string, "Acceptable Disk Space Avaiable", [DiagnosticValue(float(g_available),"free space (GB)")],[])
-            elif (g_available > 1):
-                stat = DiagnosticStatus(1,component_string, "Low Disk Space Avaiable", [DiagnosticValue(float(g_available),"free space (GB)")],[])
-            else:
-                stat = DiagnosticStatus(2,component_string, "Critically Low Disk Space Avaiable", [DiagnosticValue(float(g_available),"free space (GB)")],[])
-        else:
-            stat = DiagnosticStatus(2,component_string, "Failure to run \"df -P --block-size\"", [],[])
-
-        pub.publish(DiagnosticMessage(None, [stat]))
-        time.sleep(1)
-
-def disk_usage_monitor_main(argv=sys.argv):
-    import optparse
-    parser = optparse.OptionParser(usage="usage: disk_usage_monitor device [size in GB default = 5]")
-    options, args = parser.parse_args(argv[1:])
-    if len(args) == 1:
-        device, offset = args[0], 5
-    elif len(args) == 2:
-        device, offset = args
-        try:
-            offset = int(offset)
-        except:
-            parser.error("size-tolerance must be a number")
-    else:
-        parser.error("Invalid arguments")
-    disk_usage_monitor(device, offset)
-
-if __name__ == "__main__":
-    try:
-        disk_usage_monitor_main(rospy.myargv())
-    except KeyboardInterrupt: pass
-    

Modified: pkg/trunk/pr2/pr2_computer_monitor/scripts/hd_monitor.py
===================================================================
--- pkg/trunk/pr2/pr2_computer_monitor/scripts/hd_monitor.py	2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/pr2/pr2_computer_monitor/scripts/hd_monitor.py	2009-07-21 06:44:50 UTC (rev 19293)
@@ -41,6 +41,7 @@
 
 import traceback
 import threading
+from threading import Timer
 import sys, os, time
 import subprocess
 
@@ -55,18 +56,18 @@
 hd_temp_error = 55
 
 stat_dict = { 0: 'OK', 1: 'Warning', 2: 'Error' }
+temp_dict = { 0: 'OK', 1: 'Warm', 2: 'Hot' }
 
-def check_hd_temp(hostname, hds, no_temp_warn):
-    stat = DiagnosticStatus()
-    stat.level = 0
-    stat.name = "%s HD Temps" % hostname
-    stat.strings = []
-    stat.values = []
+def get_hddtemp_data():
+    hds = ['/dev/sda', '/dev/sdb']
 
-    for index, hd in enumerate(hds):
-        stat.strings.append(DiagnosticString(label = 'Disk %d HW addr' % index, value = hd))
-        try:
-            p = subprocess.Popen('hddtemp %s' % hd, 
+    drives = []
+    makes = []
+    temps = []
+
+    try:
+        for hd in hds:
+            p = subprocess.Popen('hddtemp %s' % hd,
                                  stdout = subprocess.PIPE,
                                  stderr = subprocess.PIPE, shell = True)
             stdout, stderr = p.communicate()
@@ -74,15 +75,9 @@
             
             stdout = stdout.replace('\n', '')
             stderr = stderr.replace('\n', '')
-            
-            temp_level = retcode
-            
-            if retcode == 0:
-                temp_level = 0
-            else:
-                temp_level = 2
-                    
-            # Parse STDOUT for temp, device ID
+
+            rospy.logerr('Hddtemp output: %s\n%s' % (stdout, stderr))
+
             lst = stdout.split(':')
             if len(lst) > 2:
                 dev_id = lst[1]
@@ -90,130 +85,285 @@
                 
                 if unicode(tmp).isnumeric():
                     temp = float(tmp)
-                    if temp > hd_temp_warn:
-                        temp_level = 1
-                    if temp > hd_temp_error:
-                        temp_level = 2
-                else:
-                    temp = float(0.0)
-                    temp_level = 2
-                    
-            else:
-                dev_id = 'ERROR'
-                temp = 0.0
-                temp_level = 2
-            
-            temp_status = stat_dict[temp_level]
-            if no_temp_warn and temp_level > 0:
-                temp_level = 0
-                temp_status = 'Warning Disabled'
+                     
+                    drives.append(hd)
+                    makes.append(dev_id)
+                    temps.append(temp)
 
+                
+    except:
+        rospy.logerr(traceback.format_exc())
+        
+    return drives, makes, temps
 
-            stat.strings.append(DiagnosticString(label = 'Disk %d Temp Status' % index, value = temp_status))
-            stat.strings.append(DiagnosticString(label = 'Disk %d Device ID' % index, value = dev_id))
-            stat.values.append(DiagnosticValue(label = 'Disk %d Temp' % index, value = temp))
+def get_hddtemp_data_socket(hostname = 'localhost', port = 7634):
+    try:
+        # Retry, see if we get some data
+        for i in range(0, 5):
+            hd_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            hd_sock.connect((hostname, port))
+            sock_data = hd_sock.recv(1024)
+            hd_sock.close()
             
-            stat.level = max(stat.level, temp_level)
-                
-        except:
-            traceback.print_exc()
-            stat.values.append(DiagnosticValue(label = 'Disk %d Temp' % index, value = 0))
-            stat.strings.append(DiagnosticString(label = 'Disk %d Temp Status' % index, value = stat_dict[2]))
-            stat.strings.append(DiagnosticString(label = 'Disk %d Device ID' % index, value = 'No ID'))
-            stat.level = 2
-            stat.message = stat_dict[2]
+            sock_vals = sock_data.split('|')
+            if len(sock_vals) > 1:
+                break
 
-    stat.message = stat_dict[stat.level]
-    return stat
+        idx = 0
+        
+        drives = []
+        makes = []
+        temps = []
+        while idx + 5 < len(sock_vals):
+            drives.append(sock_vals[idx + 1])
+            makes.append(sock_vals[idx + 2])
+            temp = float(sock_vals[idx + 3])
+            temps.append(temp)
+                        
+            idx = idx + 5
 
-def check_disk_usage(hostname, home_dir):
-    stat = DiagnosticStatus()
-    stat.level = 0
-    stat.name = "%s HD Disk Usage" % hostname
-    stat.strings = []
-    stat.values = []
+        return drives, makes, temps
+    except:
+        rospy.logerr(traceback.format_exc())
+        return [], [], []
 
-    try:
-        p = subprocess.Popen(["df", "-P", "--block-size=1G", home_dir], 
-                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        stdout, stderr = p.communicate()
-        retcode = p.returncode
 
-        if (retcode == 0):
-            stat.strings.append(DiagnosticString(label = 'Disk Space Reading', value = 'OK'))
-            row_count = 0
-            for row in stdout.split('\n'):
-                if len(row.split()) < 2:
-                    continue
-                if not unicode(row.split()[1]).isnumeric() or float(row.split()[1]) < 10: # Ignore small drives
-                    continue
+
+def update_status_stale(stat, last_update_time):
+    time_since_update = rospy.get_time() - last_update_time
+
+    level = stat.level
+    stale_status = 'OK'
+    if time_since_update > 45:
+        stale_status = 'Lagging'
+        level = max(level, 1)
+    if time_since_update > 90:
+        stale_status = 'Stale'
+        level = max(level, 2)
+        
+    stat.strings.pop(0)
+    stat.values.pop(0)
+    stat.strings.insert(0, DiagnosticString(label = 'Update Status', value = stale_status))
+    stat.values.insert(0, DiagnosticValue(label = 'Time Since Update', value = time_since_update))
+
+class hdMonitor():
+    def __init__(self, hostname, home_dir = ''):
+        rospy.init_node('hd_monitor_%s' % hostname)
+
+        self._mutex = threading.Lock()
+        
+        self._hostname = hostname
+        self._no_temp_warn =  rospy.get_param('no_hd_temp_warn', False)
+        self._home_dir = home_dir
+
+        self._diag_pub = rospy.Publisher('/diagnostics', DiagnosticMessage)
+
+        self._temp_stat = DiagnosticStatus()
+        self._temp_stat.name = "%s HD Temperature" % hostname
+        self._temp_stat.level = 2
+        self._temp_stat.message = 'No Data'
+        self._temp_stat.strings = [ DiagnosticString(label = 'Update Status', value = 'No Data' )]
+        self._temp_stat.values = [ DiagnosticValue(label = 'Time Since Last Update', value = 100000 )]
+
+        if self._home_dir != '':
+            self._usage_stat = DiagnosticStatus()
+            self._usage_stat.level = 2
+            self._usage_stat.name = '%s HD Usage' % hostname
+            self._usage_stat.strings = [ DiagnosticString(label = 'Update Status', value = 'No Data' )]
+            self._usage_stat.values = [ DiagnosticValue(label = 'Time Since Last Update', value = 100000) ]
+            self.check_disk_usage()
+
+        self._last_temp_time = 0
+        self._last_usage_time = 0
+        self._last_publish_time = 0
+        
+        self.check_temps()
+
+        self._temp_timer = None
+        self._usage_timer = None
+        self._publish_timer = threading.Timer(1.0, self.publish_stats)
+        self._publish_timer.start()
+        
+        ## Must have the lock to cancel everything
+    def cancel_timers(self):
+        if self._temp_timer:
+            self._temp_timer.cancel()
+ 
+        if self._usage_timer:
+            self._usage_timer.cancel()
+
+        if self._publish_timer:
+            self._publish_timer.cancel()
+
+    def check_temps(self):
+        if rospy.is_shutdown():
+            self._mutex.acquire()
+            self.cancel_timers()
+            self._mutex.release()
+            return
+
+        diag_strs = [ DiagnosticString(label = 'Update Status', value = 'OK' ) ]
+        diag_vals = [ DiagnosticValue(label = 'Time Since Last Update', value = 0 ) ]
+        diag_level = 0
+        
+        
+        drives, makes, temps = get_hddtemp_data_socket()
+        if len(drives) == 0:
+            diag_strs.append(DiagnosticString(label = 'Disk Temp Data', value = 'No hddtemp data'))
+            diag_level = 2
+
+        for index in range(0, len(drives)):
+            temp = temps[index]
+            temp_level = 0
+            if temp > hd_temp_warn:
+                temp_level = 1
+            if temp > hd_temp_error:
+                temp_level = 2
+            diag_level = max(diag_level, temp_level)
+
+            diag_strs.append(DiagnosticString(label = 'Disk %d Temp Status' % index, value = temp_dict[temp_level]))
+            diag_strs.append(DiagnosticString(label = 'Disk %d Mount Pt.' % index, value = drives[index]))
+            diag_strs.append(DiagnosticString(label = 'Disk %d Device ID' % index, value = makes[index]))
+            diag_vals.append(DiagnosticValue(label = 'Disk %d Temp' % index, value = temp))
+        
+        self._mutex.acquire()
+        self._last_temp_time = rospy.get_time()
+
+        self._temp_stat.strings = diag_strs
+        self._temp_stat.values = diag_vals
+
+        self._temp_stat.level = diag_level
+
+        # Give No Data message if we have no reading
+        self._temp_stat.message = temp_dict[diag_level]
+        if len(self._temp_stat.values) == 1: 
+            self._temp_stat.message = 'No Data'
+
+        if self._no_temp_warn and self._temp_stat.message != 'No Data':
+            self._temp_stat.level = 0
+
+
+        if not rospy.is_shutdown():
+            self._temp_timer = threading.Timer(30.0, self.check_temps)
+            self._temp_timer.start()
+        else:
+            self.cancel_timers()
+
+        self._mutex.release()
+        
+    def check_disk_usage(self):
+        diag_strs = [ DiagnosticString(label = 'Update Status', value = 'OK' ) ]
+        diag_vals = [ DiagnosticValue(label = 'Time Since Last Update', value = 0 ) ]
+        diag_level = 0
+        
+        try:
+            p = subprocess.Popen(["df", "-P", "--block-size=1G", self._home_dir], 
+                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout, stderr = p.communicate()
+            retcode = p.returncode
+            
+            if (retcode == 0):
                 
-                row_count += 1
-                g_available = float(row.split()[-3])
-                name = row.split()[0]
-                size = float(row.split()[1])
-                mount_pt = row.split()[-1]
+                diag_strs.append(DiagnosticString(label = 'Disk Space Reading', value = 'OK'))
+                row_count = 0
+                for row in stdout.split('\n'):
+                    if len(row.split()) < 2:
+                        continue
+                    if not unicode(row.split()[1]).isnumeric() or float(row.split()[1]) < 10: # Ignore small drives
+                        continue
                 
-                if (g_available > low_hd_level):
-                    level = 0
-                elif (g_available > critical_hd_level):
-                    level = 1
-                else:
-                    level = 2
-        
-                stat.strings.append(DiagnosticString(label = 'Disk %d Name' % row_count, value = name))
-                stat.values.append(DiagnosticString(label = 'Disk %d Available' % row_count, value = g_available))
-                stat.values.append(DiagnosticString(label = 'Disk %d Size' % row_count, value = size))
-                stat.strings.append(DiagnosticString(label = 'Disk %d Status' % row_count, value = stat_dict[level]))
-                stat.strings.append(DiagnosticString(label = 'Disk %d Mount Point' % row_count, value = mount_pt))
+                    row_count += 1
+                    g_available = float(row.split()[-3])
+                    name = row.split()[0]
+                    size = float(row.split()[1])
+                    mount_pt = row.split()[-1]
+                    
+                    if (g_available > low_hd_level):
+                        level = 0
+                    elif (g_available > critical_hd_level):
+                        level = 1
+                    else:
+                        level = 2
+                        
+                    diag_strs.append(DiagnosticString(
+                            label = 'Disk %d Name' % row_count, value = name))
+                    diag_vals.append(DiagnosticString(
+                            label = 'Disk %d Available' % row_count, value = g_available))
+                    diag_vals.append(DiagnosticString(
+                            label = 'Disk %d Size' % row_count, value = size))
+                    diag_strs.append(DiagnosticString(
+                            label = 'Disk %d Status' % row_count, value = stat_dict[level]))
+                    diag_strs.append(DiagnosticString(
+                            label = 'Disk %d Mount Point' % row_count, value = mount_pt))
+                    
+                    diag_level = max(diag_level, level)
                 
-                stat.level = max(level, stat.level)
-                
-        else:
-            stat.strings.append(DiagnosticString(label = 'Disk Space Reading', value = 'Failed'))
-            stat.level = 2
+            else:
+                diag_strs.append(DiagnosticString(label = 'Disk Space Reading', value = 'Failed'))
+                diag_level = 2
             
+                    
+        except:
+            rospy.logerr(traceback.format_exc())
+            diag_strs.append(DiagnosticString(label = 'Disk Space Reading', value = 'Exception'))
+            diag_strs.append(DiagnosticString(label = 'Disk Space Ex', value = traceback.format_exc()))
 
-    except:
-        traceback.print_exc()
-        stat.strings.append(DiagnosticString(label = 'Disk Space Reading', value = 'Exception'))
-        stat.level = 2
+            diag_level = 2
+            
+            stat.message = stat_dict[stat.level]
 
-    stat.message = stat_dict[stat.level]
-    return stat
+        # Update status
+        self._mutex.acquire()
+        self._last_usage_time = rospy.get_time()
+        self._usage_stat.level = diag_level
+        self._usage_stat.values = diag_vals
+        self._usage_stat.strings = diag_strs
+        
+        self._usage_stat.message = stat_dict[diag_level]
 
+        if not rospy.is_shutdown():
+            self._usage_timer = threading.Timer(5.0, self.check_disk_usage)
+            self._usage_timer.start()
+        else:
+            self.cancel_timers()
+
+        self._mutex.release()
+
         
-# TODO: Need to check HD input/output too using iostat
+    def publish_stats(self):
+        self._mutex.acquire()
+        update_status_stale(self._temp_stat, self._last_temp_time)
+        
+        msg = DiagnosticMessage()
+        msg.status.append(self._temp_stat)
+        if self._home_dir != '':
+            update_status_stale(self._usage_stat, self._last_usage_time)
+            msg.status.append(self._usage_stat)
+        
+        if rospy.get_time() - self._last_publish_time > 0.5:
+            self._diag_pub.publish(msg)
+            self._last_publish_time = rospy.get_time()
 
-def main():
-    hds = []
+        if not rospy.is_shutdown():
+            self._publish_timer = threading.Timer(1.0, self.publish_stats)
+            self._publish_timer.start()
+        else:
+            self.cancel_timers()
 
-    hostname = socket.gethostname()
-    
-    rospy.init_node('hd_monitor_%s' % hostname, anonymous = True)
-    
-    pub = rospy.Publisher('/diagnostics', DiagnosticMessage)
+        self._mutex.release()
 
-    home_dir = rospy.myargv()[1]
-    hds = rospy.myargv()[2:]
 
-    no_temp_warn = rospy.get_param('no_hd_temp_warn', False)
-
-    while not rospy.is_shutdown():
-        msg = DiagnosticMessage()
-        msg.status = []
         
-        # Temperature
-        msg.status.append(check_hd_temp(hostname, hds, no_temp_warn))
-                           
-        # Check disk usage
-        msg.status.append(check_disk_usage(hostname, home_dir))
+# TODO: Need to check HD input/output too using iostat
 
-        pub.publish(msg)
-        time.sleep(1.0)
+if __name__ == '__main__':
+    hostname = socket.gethostname()
 
+    home_dir = ''
+    if len(rospy.myargv()) > 1:
+        home_dir = rospy.myargv()[1]
 
+    hd_monitor = hdMonitor(hostname, home_dir)
+    
 
-if __name__ == '__main__':
-    main()
             

Modified: pkg/trunk/robot_descriptions/pr2/pr2_configurations/pr2_head_cart/cart_monitors.launch
===================================================================
--- pkg/trunk/robot_descriptions/pr2/pr2_configurations/pr2_head_cart/cart_monitors.launch	2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/robot_descriptions/pr2/pr2_configurations/pr2_head_cart/cart_monitors.launch	2009-07-21 06:44:50 UTC (rev 19293)
@@ -2,14 +2,15 @@
 
   <!-- Will have APC Battery drivers in here at some point -->
 
-  <!-- AOpens on PRF, PRG get hot, don't bother warning users -->
+  <!-- AOpens on PRF, PRG get hot, don't bother warning users --> 
   <param name="no_hd_temp_warn" value="True" />
-  <!-- ipmitool doesn't work on these for some reason -->
-  <param name="no_cpu_temp" value="True" />
 
+  <!-- ipmitool doesn't work on AOpen's -->
+  <param name="check_ipmi_tool" value="False" />
+  <param name="enforce_clock_speed value="False" />
  
   <!-- Disk usage monitor -->
-  <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="realtime_root"/>
+  <node pkg="pr2_computer_monitor" type="hd_monitor.py" machine="realtime_root"/>
   <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="two_root"/> 
 
   <!-- CPU monitors -->

Modified: pkg/trunk/stacks/pr2/pr2_alpha/pr2_monitors.launch
===================================================================
--- pkg/trunk/stacks/pr2/pr2_alpha/pr2_monitors.launch	2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/stacks/pr2/pr2_alpha/pr2_monitors.launch	2009-07-21 06:44:50 UTC (rev 19293)
@@ -2,19 +2,21 @@
 
   <!-- AOpens on PRF, PRG get hot, don't bother warning users --> 
   <param name="no_hd_temp_warn" value="True" />
-  <!-- ipmitool doesn't work on these for some reason -->
-  <param name="no_cpu_temp" value="True" />
 
+  <!-- ipmitool doesn't work on AOpen's -->
+  <param name="check_ipmi_tool" value="False" />
+  <param name="enforce_clock_speed value="False" />
+
   <!-- Disk usage monitor -->
-  <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="realtime_root"/>
-  <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="two_root"/> 
-  <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="three_root"/> 
-  <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="four_root"/> 
+  <node pkg="pr2_computer_monitor" type="hd_monitor.py" machine="realtime_root"/>
+  <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home)" machine="two_root"/> 
+  <node pkg="pr2_computer_monitor" type="hd_monitor.py" machine="three_root"/> 
+  <node pkg="pr2_computer_monitor" type="hd_monitor.py" machine="four_root"/> 
 
   <!-- CPU monitors -->
-  <node pkg="pr2_computer_monitor" type="cpu_monitor.py" args="--no_cpu_temp" machine="realtime_root"/>
-  <node pkg="pr2_computer_monitor" type="cpu_monitor.py" args="--no_cpu_temp" machine="two_root"/> 
-  <node pkg="pr2_computer_monitor" type="cpu_monitor.py" args="--no_cpu_temp" machine="three_root"/> 
-  <node pkg="pr2_computer_monitor" type="cpu_monitor.py" args="--no_cpu_temp" machine="four_root"/> 
+  <node pkg="pr2_computer_monitor" type="cpu_monitor.py" machine="realtime_root"/>
+  <node pkg="pr2_computer_monitor" type="cpu_monitor.py" machine="two_root"/> 
+  <node pkg="pr2_computer_monitor" type="cpu_monitor.py" machine="three_root"/> 
+  <node pkg="pr2_computer_monitor" type="cpu_monitor.py" machine="four_root"/> 
 
 </launch>
\ No newline at end of file


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.