|
From: <wa...@us...> - 2009-07-21 06:44:57
|
Revision: 19293
http://personalrobots.svn.sourceforge.net/personalrobots/?rev=19293&view=rev
Author: wattsk
Date: 2009-07-21 06:44:50 +0000 (Tue, 21 Jul 2009)
Log Message:
-----------
Updated hd and cpu monitor to use threading to reduce load and lag. Hddtemp now called with socket
Modified Paths:
--------------
pkg/trunk/pr2/pr2_computer_monitor/cpu_monitor.launch
pkg/trunk/pr2/pr2_computer_monitor/hd_monitor.launch
pkg/trunk/pr2/pr2_computer_monitor/scripts/cpu_monitor.py
pkg/trunk/pr2/pr2_computer_monitor/scripts/hd_monitor.py
pkg/trunk/robot_descriptions/pr2/pr2_configurations/pr2_head_cart/cart_monitors.launch
pkg/trunk/stacks/pr2/pr2_alpha/pr2_monitors.launch
Removed Paths:
-------------
pkg/trunk/pr2/pr2_computer_monitor/scripts/disk_usage.py
Modified: pkg/trunk/pr2/pr2_computer_monitor/cpu_monitor.launch
===================================================================
--- pkg/trunk/pr2/pr2_computer_monitor/cpu_monitor.launch 2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/pr2/pr2_computer_monitor/cpu_monitor.launch 2009-07-21 06:44:50 UTC (rev 19293)
@@ -2,8 +2,9 @@
<!-- Should make this be localhost or whatever -->
<machine name="local_root" user="root" address="nsf" ros-root="$(env ROS_ROOT)" ros-package-path="$(env ROS_PACKAGE_PATH)" default="never"/>
- <param name="no_cpu_temp" value="True" />
-
<!-- Set machine to robot machine in final version -->
+ <param name="check_ipmi_tool" value="False" />
<node machine="local_root" pkg="pr2_computer_monitor" type="cpu_monitor.py" />
+
+
</launch>
\ No newline at end of file
Modified: pkg/trunk/pr2/pr2_computer_monitor/hd_monitor.launch
===================================================================
--- pkg/trunk/pr2/pr2_computer_monitor/hd_monitor.launch 2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/pr2/pr2_computer_monitor/hd_monitor.launch 2009-07-21 06:44:50 UTC (rev 19293)
@@ -5,5 +5,8 @@
<param name="no_hd_temp_warn" value="True" />
<!-- Set machine to robot machine in final version -->
- <node machine="local_root" pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda /dev/sdb" />
+ <node machine="local_root" pkg="pr2_computer_monitor" type="hd_monitor.py"
+ args="$(optenv HOME /home)" />
+
+
</launch>
\ No newline at end of file
Modified: pkg/trunk/pr2/pr2_computer_monitor/scripts/cpu_monitor.py
===================================================================
--- pkg/trunk/pr2/pr2_computer_monitor/scripts/cpu_monitor.py 2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/pr2/pr2_computer_monitor/scripts/cpu_monitor.py 2009-07-21 06:44:50 UTC (rev 19293)
@@ -41,8 +41,10 @@
import traceback
import threading
+from threading import Timer
import sys, os, time
import subprocess
+import string
import socket
@@ -50,115 +52,219 @@
stat_dict = { 0: 'OK', 1: 'Warning', 2: 'Error' }
-def check_cpu_temp(hostname):
- stat = DiagnosticStatus()
- stat.name = '%s CPU Temp Data' % hostname
- stat.strings = []
- stat.values = []
+# Output entire IPMI data set
+def check_ipmi():
+ diag_strs = []
+ diag_vals = []
+ diag_msgs = []
+ diag_level = 0
try:
- p = subprocess.Popen('sudo ipmitool sdr type Temperature',
+ p = subprocess.Popen('ipmitool sdr',
stdout = subprocess.PIPE,
stderr = subprocess.PIPE, shell = True)
stdout, stderr = p.communicate()
retcode = p.returncode
-
- stat.level = retcode
-
- if retcode == 0:
- stat.level = 0
- stat.message = 'OK'
- else:
- stat.level = 2
- stat.message = 'Bad return code'
- stat.strings.append(DiagnosticString(label = 'Error code: %d' % retcode, value = stderr.replace('\n', '')))
-
- # Parse STDOUT for temp, device ID
+ if retcode != 0:
+ diag_level = 2
+ diag_msg = [ 'ipmitool Error' ]
+ diag_strs = [ DiagnosticString(label = 'IPMI Error', value = stderr) ]
+ return diag_strs, diag_vals, diag_msgs, diag_level
+
lines = stdout.split('\n')
if len(lines) < 2:
- stat.strings.append(DiagnosticString(label = 'ipmitool status', value = 'No output'))
- stat.message = 'No response'
- stat.level = 1
+ diag_strs = [ DiagnosticString(label = 'ipmitool status', value = 'No output') ]
+ diag_msgs = [ 'No response' ]
+ diag_level = 2
+ return diag_strs, diag_vals, diag_msgs, diag_level
+
for ln in lines:
-
- lst = ln.split('|')
- if lst[-1].endswith('degrees C'):
- tmp = lst[-1].rstrip('degrees C').lstrip()
- if unicode(tmp).isnumeric():
- stat.values.append(DiagnosticValue(label = lst[0], value = float(tmp)))
- else:
- stat.strings.append(DiagnosticString(label = lst[0], value = lst[-1]))
-
+ if len(ln) < 2:
+ continue
+ words = ln.split('|')
+ name = words[0].strip()
+ ipmi_val = words[1].strip()
+ stat_byte = words[1].strip()
+
+ # CPU temps
+ if words[0].startswith('CPU') and words[0].strip().endswith('Temp'):
+ if words[1].strip().endswith('degrees C'):
+ tmp = ipmi_val.rstrip(' degrees C').lstrip()
+ if unicode(tmp).isnumeric():
+ temperature = float(tmp)
+ diag_vals.append(DiagnosticValue(label = name, value = temperature))
+
+ cpu_name = name.split()[0]
+ if temperature >= 80 and temperature < 85:
+ diag_level = max(diag_level, 1)
+ if diag_msgs.count('CPU Hot') == 0:
+ diag_msgs.append('CPU Warm')
+ if temperature >= 85:
+ diag_level = max(diag_level, 2)
+ diag_msgs.append('CPU Hot')
+ # Don't keep CPU Warm in list if CPU is hot
+ if diag_msgs.count('CPU Warm') > 0:
+ idx = diag_msgs.index('CPU Warm')
+ diag_msgs.pop(idx)
+
+
+ else:
+ diag_strs.append(DiagnosticString(label = name, value = words[1]))
+
+
+ # MP, BP, FP temps
+ if name == 'MB Temp' or name == 'BP Temp' or name == 'FP Temp':
+ if ipmi_val.endswith('degrees C'):
+ tmp = ipmi_val.rstrip(' degrees C').lstrip()
+ if unicode(tmp).isnumeric():
+ temperature = float(tmp)
+ diag_vals.append(DiagnosticValue(label = name, value = temperature))
+
+ dev_name = name.split()[0]
+ if temperature >= 60 and temperature < 70:
+ diag_level = max(diag_level, 1)
+ diag_msgs.append('%s Warm' % dev_name)
+ if temperature >= 70:
+ diag_level = max(diag_level, 2)
+ diag_msgs.append('%s Hot' % dev_name)
+
+ else:
+ diag_strs.append(DiagnosticString(label = name, value = ipmi_val))
+
+ # CPU fan speeds
+ if (name.startswith('CPU') and name.endswith('Fan')) or name == 'MB Fan':
+ if ipmi_val.endswith('RPM'):
+ rpm = ipmi_val.rstrip(' RPM').lstrip()
+ if unicode(rpm).isnumeric():
+ rpm = float(rpm)
+ diag_vals.append(DiagnosticValue(label = name, value = rpm))
+ else:
+ diag_strs.append(DiagnosticString(label = name, value = ipmi_val))
+
+ # If CPU is hot we get an alarm from ipmitool, report that too
+ if name.startswith('CPU') and name.endswith('hot'):
+ if ipmi_val == '0x01':
+ diag_strs.append(DiagnosticString(label = name, value = 'OK'))
+ else:
+ diag_strs.append(DiagnosticString(label = name, value = 'Hot'))
+ diag_level = max(diag_level, 2)
+ diag_msgs.append('CPU Hot Alarm')
+
except Exception, e:
- traceback.print_exc()
+ diag_strs.append(DiagnosticString(label = 'Exception', value = traceback.format_exc()))
+ diag_level = 2
+ diag_msgs.append('Exception')
+
+ return diag_strs, diag_vals, diag_msgs, diag_level
- stat.level = 2
- stat.message = 'Exception'
- stat.strings.append(DiagnosticString(label = 'Exception', value = str(e)))
+
+# Check core temps
+# Use 'find /sys -name temp1_input' to find cores
+# Read from every core, divide by 1000
+def check_core_temps(sys_temp_strings):
+ diag_vals = []
+ diag_strs = []
+ diag_level = 0
+ diag_msgs = []
+
+ for index, temp_str in enumerate(sys_temp_strings):
+ if len(temp_str) < 5:
+ continue
- return stat
+ cmd = 'cat %s' % temp_str
+ p = subprocess.Popen(cmd, stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE, shell = True)
+ stdout, stderr = p.communicate()
+ retcode = p.returncode
+ if retcode != 0:
+ diag_level = 2
+ diag_msg = [ 'Core Temp Error' ]
+ diag_strs = [ DiagnosticString(label = 'Core Temp Error', value = stderr), DiagnosticString(label = 'Output', value = stdout) ]
+ return diag_strs, diag_vals, diag_msgs, diag_level
+
+ tmp = stdout.strip()
+ if unicode(tmp).isnumeric():
+ temp = float(tmp) / 1000
+ diag_vals.append(DiagnosticValue(label = 'Core %d Temp' % index, value = temp))
+ if temp >= 80 and temp < 85:
+ diag_level = max(diag_level, 1)
+ diag_msgs.append('Warm')
+ if temp >= 85:
+ diag_level = max(diag_level, 2)
+ diag_msgs.append('Hot')
+ else:
+ diag_strs.append(DiagnosticString(label = 'Core %s Temp' % index, value = tmp))
-def check_nfs_stat(hostname):
- stat = DiagnosticStatus()
- stat.name = '%s NFS I/O' % hostname
- stat.level = 0
- stat.message = 'OK'
- stat.strings = []
- stat.values = []
+ return diag_strs, diag_vals, diag_msgs, diag_level
+## Checks clock speed from reading from CPU info
+def check_clock_speed(enforce_speed):
+ strs = []
+ vals = []
+ msgs = []
+ lvl = 0
+
try:
- p = subprocess.Popen('iostat -n',
+ p = subprocess.Popen('cat /proc/cpuinfo | grep MHz',
stdout = subprocess.PIPE,
stderr = subprocess.PIPE, shell = True)
stdout, stderr = p.communicate()
retcode = p.returncode
-
- for index, row in enumerate(stdout.split('\n')):
- if index < 3:
- continue
- lst = row.split()
- if len(lst) < 7:
+ if retcode != 0:
+ lvl = 2
+ msgs = [ 'Clock speed error' ]
+ strs = [ DiagnosticString(label = 'Clock speed error', value = stderr),
+ DiagnosticString(label = 'Output', value = stdout) ]
+
+ return (strs, vals, msgs, lvl)
+
+ for index, ln in enumerate(stdout.split('\n')):
+ words = ln.split(':')
+ if len(words) < 2:
continue
- file_sys = lst[0]
- read_blk = float(lst[1])
- write_blk = float(lst[2])
- read_blk_dir = float(lst[3])
- write_blk_dir = float(lst[4])
- r_blk_srv = float(lst[5])
- w_blk_srv = float(lst[6])
+ speed = words[1].strip().split('.')[0]
+ if unicode(speed).isnumeric():
+ mhz = float(speed)
+ vals.append(DiagnosticValue(label = 'Core %d Speed' % index, value = mhz))
+
+ if mhz < 2240 and mhz > 2150:
+ lvl = max(lvl, 1)
+ if mhz <= 2150:
+ lvl = max(lvl, 2)
- stat.values.append(DiagnosticValue(
- label = '%s Read Blks/s' % file_sys, value=read_blk))
- stat.values.append(DiagnosticValue(
- label = '%s Write Blks/s' % file_sys, value=write_blk))
- stat.values.append(DiagnosticValue(
- label = '%s Read Blk dir/s' % file_sys, value=read_blk_dir))
- stat.values.append(DiagnosticValue(
- label = '%s Write Blks dir/s' % file_sys, value=write_blk_dir))
- stat.values.append(DiagnosticValue(
- label = '%s Read Blks srv/s' % file_sys, value=r_blk_srv))
- stat.values.append(DiagnosticValue(
- label = '%s Write Blks srv/s' % file_sys, value=w_blk_srv))
-
+ else:
+ lvl = max(lvl, 2)
+ strs.append(DiagnosticString(label = 'Core %d Speed' % index, value = speed))
+
+ if not enforce_speed:
+ lvl = 0
+
+ if lvl == 1 and enforce_speed:
+ msgs = [ 'Core slowing' ]
+ elif lvl == 2 and enforce_speed:
+ msgs = [ 'Core throttled' ]
+
except Exception, e:
- traceback.print_exc()
- stat.level = 2
- stat.message = 'Exception'
- stat.strings.append(DiagnosticString(label = 'Exception', value = str(e)))
-
- return stat
+ rospy.logerr(traceback.format_exc())
+ lvl = 2
+ msgs.append('Exception')
+ strs.append(DiagnosticString(label = 'Exception', value = traceback.format_exc()))
+ return strs, vals, msgs, lvl
+
+
+# Add msgs output, too
def check_uptime():
level = 0
vals = []
- string = DiagnosticString(label = 'Load Average Status', value = 'Error')
+ str = DiagnosticString(label = 'Load Average Status', value = 'Error')
try:
p = subprocess.Popen('uptime', stdout = subprocess.PIPE,
@@ -182,15 +288,16 @@
if load1 > 35 or load5 > 25 or load15 > 20:
level = 2
- string = DiagnosticString(label = 'Load Average Status', value = stat_dict[level])
+ str = DiagnosticString(label = 'Load Average Status', value = stat_dict[level])
except Exception, e:
- rospy.logerr(traceback.print_exc())
+ rospy.logerr(traceback.format_exc())
level = 2
- DiagnosticString(label = 'Load Average Status', value = str(e))
+ str = DiagnosticString(label = 'Load Average Status', value = str(e))
- return level, vals, string
+ return level, vals, str
+# Add msgs output
def check_memory():
values = []
str = DiagnosticString(label = 'Memory Status', value = 'Exception')
@@ -227,13 +334,11 @@
return level, values, str
# Use mpstat
-def check_usage(hostname):
- stat = DiagnosticStatus()
- stat.name = '%s CPU Usage' % hostname
- stat.level = 0
- stat.strings = []
- stat.values = []
-
+def check_mpstat():
+ strs = []
+ vals = []
+ mp_level = 0
+
try:
p = subprocess.Popen('mpstat -P ALL 1 1',
stdout = subprocess.PIPE,
@@ -259,71 +364,338 @@
user = float(lst[3])
system = float(lst[5])
- stat.values.append(DiagnosticValue(label = 'CPU %s User' % cpu_name, value = user))
- stat.values.append(DiagnosticValue(label = 'CPU %s System' % cpu_name, value = system))
- stat.values.append(DiagnosticValue(label = 'CPU %s Idle' % cpu_name, value = idle))
+ vals.append(DiagnosticValue(label = 'CPU %s User' % cpu_name, value = user))
+ vals.append(DiagnosticValue(label = 'CPU %s System' % cpu_name, value = system))
+ vals.append(DiagnosticValue(label = 'CPU %s Idle' % cpu_name, value = idle))
- cpu_lvl = 0
if user > 75.0:
- cpu_lvl = 1
+ mp_level = 1
if user > 90.0:
- cpu_lvl = 2
- stat.level = max(stat.level, cpu_lvl)
+ mp_level = 2
- stat.strings.append(DiagnosticString(label = 'CPU %s Status' % cpu_name, value = stat_dict[cpu_lvl]))
+ strs.append(DiagnosticString(label = 'CPU %s Status' % cpu_name, value = stat_dict[mp_level]))
+
+ except Exception, e:
+ mp_level = 2
+ strs.append(DiagnosticString(label = 'mpstat Exception', value = str(e)))
+
+ return mp_level, vals, strs
+
+## Returns names for core temperature files
+## Returns list of names, each name can be read like file
+def get_core_temp_names():
+ temp_vals = []
+ try:
+ p = subprocess.Popen('find /sys -name temp1_input',
+ stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE, shell = True)
+ stdout, stderr = p.communicate()
+ retcode = p.returncode
+
+ if retcode != 0:
+ rospy.logerr('Error find core temp locations: %s' % stderr)
+ return []
+
+ for ln in stdout.split('\n'):
+ temp_vals.append(ln.strip())
+
+ return temp_vals
+ except:
+ rospy.logerr('Exception finding temp vals: %s' % traceback.format_exc())
+ return []
+
+def update_status_stale(stat, last_update_time):
+ time_since_update = rospy.get_time() - last_update_time
+
+ level = stat.level
+ stale_status = 'OK'
+ if time_since_update > 20:
+ stale_status = 'Lagging'
+ level = max(level, 1)
+ if time_since_update > 35:
+ stale_status = 'Stale'
+ level = max(level, 2)
+
+ stat.strings.pop(0)
+ stat.values.pop(0)
+ stat.strings.insert(0, DiagnosticString(label = 'Update Status', value = stale_status))
+ stat.values.insert(0, DiagnosticValue(label = 'Time Since Update', value = time_since_update))
+
+class CPUMonitor():
+ def __init__(self, hostname):
+ rospy.init_node('cpu_monitor_%s' % hostname)
+
+ self._diag_pub = rospy.Publisher('/diagnostics', DiagnosticMessage)
+
+ self._mutex = threading.Lock()
+
+ self._check_ipmi = rospy.get_param('check_ipmi_tool', True)
+ self._enforce_speed = rospy.get_param('enforce_clock_speed', True)
+
+ # Get temp_input files
+ self._temp_vals = get_core_temp_names()
+
+ # CPU stats
+ self._temp_stat = DiagnosticStatus()
+ self._temp_stat.name = '%s CPU Temperature' % hostname
+ self._temp_stat.level = 2
+ self._temp_stat.message = 'No Data'
+ self._temp_stat.strings = [ DiagnosticString(label = 'Update Status', value = 'No Data' )]
+ self._temp_stat.values = [ DiagnosticValue(label = 'Time Since Last Update', value = 100000 )]
+
+ self._usage_stat = DiagnosticStatus()
+ self._usage_stat.name = '%s CPU Usage' % hostname
+ self._usage_stat.level = 2
+ self._usage_stat.message = 'No Data'
+ self._usage_stat.strings = [ DiagnosticString(label = 'Update Status', value = 'No Data' )]
+ self._usage_stat.values = [ DiagnosticValue(label = 'Time Since Last Update', value = 100000 )]
+
+ self._nfs_stat = DiagnosticStatus()
+ self._nfs_stat.name = '%s NFS I/O' % hostname
+ self._nfs_stat.level = 2
+ self._nfs_stat.message = 'No Data'
+ self._nfs_stat.strings = [ DiagnosticString(label = 'Update Status', value = 'No Data' )]
+ self._nfs_stat.values = [ DiagnosticValue(label = 'Time Since Last Update', value = 100000 )]
+
+ self._last_temp_time = 0
+ self._last_usage_time = 0
+ self._last_nfs_time = 0
+ self._last_publish_time = 0
+
+ self._temps_timer = None
+ self._usage_timer = None
+ self._nfs_timer = None
+ self._publish_timer = None
+ ##@TODO Need wireless stuff, at some point, put NFS in usage status
+
+ # Start checking everything
+ self.check_temps()
+ self.check_nfs_stat()
+ self.check_usage()
+
+ self._publish_timer = threading.Timer(1.0, self.publish_stats)
+ self._publish_timer.start()
+
+ ## Must have the lock to cancel everything
+ def cancel_timers(self):
+ if self._temps_timer:
+ self._temps_timer.cancel()
+
+ if self._nfs_timer:
+ self._nfs_timer.cancel()
+
+ if self._usage_timer:
+ self._usage_timer.cancel()
+
+ if self._publish_timer:
+ self._publish_timer.cancel()
+
+ def check_nfs_stat(self):
+ if rospy.is_shutdown():
+ self._mutex.acquire()
+ self.cancel_timers()
+ self._mutex.release()
+ return
+
+ nfs_level = 0
+ msg = 'OK'
+ strs = [ DiagnosticString(label = 'Update Status', value = 'OK' )]
+ vals = [ DiagnosticValue(label = 'Time Since Last Update', value = 0 )]
+
+ try:
+ p = subprocess.Popen('iostat -n',
+ stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE, shell = True)
+ stdout, stderr = p.communicate()
+ retcode = p.returncode
+ for index, row in enumerate(stdout.split('\n')):
+ if index < 3:
+ continue
+
+ lst = row.split()
+ if len(lst) < 7:
+ continue
+
+ file_sys = lst[0]
+ read_blk = float(lst[1])
+ write_blk = float(lst[2])
+ read_blk_dir = float(lst[3])
+ write_blk_dir = float(lst[4])
+ r_blk_srv = float(lst[5])
+ w_blk_srv = float(lst[6])
+
+ vals.append(DiagnosticValue(
+ label = '%s Read Blks/s' % file_sys, value=read_blk))
+ vals.append(DiagnosticValue(
+ label = '%s Write Blks/s' % file_sys, value=write_blk))
+ vals.append(DiagnosticValue(
+ label = '%s Read Blk dir/s' % file_sys, value=read_blk_dir))
+ vals.append(DiagnosticValue(
+ label = '%s Write Blks dir/s' % file_sys, value=write_blk_dir))
+ vals.append(DiagnosticValue(
+ label = '%s Read Blks srv/s' % file_sys, value=r_blk_srv))
+ vals.append(DiagnosticValue(
+ label = '%s Write Blks srv/s' % file_sys, value=w_blk_srv))
+
+ except Exception, e:
+ rospy.logerr(traceback.format_exc())
+ nfs_level = 2
+ msg = 'Exception'
+ strings.append(DiagnosticString(label = 'Exception', value = str(e)))
+
+ self._mutex.acquire()
+
+ self._nfs_stat.level = nfs_level
+ self._nfs_stat.message = msg
+ self._nfs_stat.strings = strs
+ self._nfs_stat.values = vals
+
+ self._last_nfs_time = rospy.get_time()
+
+ if not rospy.is_shutdown():
+ self._nfs_timer = threading.Timer(5.0, self.check_nfs_stat)
+ self._nfs_timer.start()
+ else:
+ self.cancel_timers()
+
+ self._mutex.release()
+
+ ## Call every 10sec at minimum
+ def check_temps(self):
+ if rospy.is_shutdown():
+ self._mutex.acquire()
+ self.cancel_timers()
+ self._mutex.release()
+ return
+
+ diag_strs = [ DiagnosticString(label = 'Update Status', value = 'OK' ) ]
+ diag_vals = [ DiagnosticValue(label = 'Time Since Last Update', value = 0 ) ]
+ diag_msgs = []
+ diag_level = 0
+
+ if self._check_ipmi:
+ ipmi_strs, ipmi_vals, ipmi_msgs, ipmi_level = check_ipmi()
+ diag_strs.extend(ipmi_strs)
+ diag_vals.extend(ipmi_vals)
+ diag_msgs.extend(ipmi_msgs)
+ diag_level = max(diag_level, ipmi_level)
+
+ core_strs, core_vals, core_msgs, core_level = check_core_temps(self._temp_vals)
+ diag_strs.extend(core_strs)
+ diag_vals.extend(core_vals)
+ diag_msgs.extend(core_msgs)
+ diag_level = max(diag_level, core_level)
+
+ clock_strs, clock_vals, clock_msgs, clock_level = check_clock_speed(self._enforce_speed)
+ diag_strs.extend(clock_strs)
+ diag_vals.extend(clock_vals)
+ diag_msgs.extend(clock_msgs)
+ diag_level = max(diag_level, clock_level)
+
+ diag_log = set(diag_msgs)
+ if len(diag_log) > 0:
+ message = string.join(diag_log, ', ')
+ else:
+ message = stat_dict[diag_level]
+
+ self._mutex.acquire()
+ self._last_temp_time = rospy.get_time()
+
+ self._temp_stat.level = diag_level
+ self._temp_stat.message = message
+ self._temp_stat.strings = diag_strs
+ self._temp_stat.values = diag_vals
+
+ if not rospy.is_shutdown():
+ self._temp_timer = threading.Timer(5.0, self.check_temps)
+ self._temp_timer.start()
+ else:
+ self.cancel_timers()
+
+ self._mutex.release()
+
+ def check_usage(self):
+ if rospy.is_shutdown():
+ self._mutex.acquire()
+ self.cancel_timers()
+ self._mutex.release()
+ return
+
+ diag_level = 0
+ diag_strs = [ DiagnosticString(label = 'Update Status', value = 'OK' ) ]
+ diag_vals = [ DiagnosticValue(label = 'Time Since Last Update', value = 0 )]
+
+ # Check mpstat
+ mp_level, mp_vals, mp_strs = check_mpstat()
+ diag_vals.extend(mp_vals)
+ diag_strs.extend(mp_strs)
+ diag_level = max(diag_level, mp_level)
+
# Check uptime
uptime_level, up_vals, up_str = check_uptime()
- stat.values.extend(up_vals)
- stat.strings.append(up_str)
- stat.level = max(stat.level, uptime_level)
-
+ diag_vals.extend(up_vals)
+ diag_strs.append(up_str)
+ diag_level = max(diag_level, uptime_level)
+
# Check memory
mem_level, mem_vals, mem_str = check_memory()
- stat.values.extend(mem_vals)
- stat.strings.append(mem_str)
- stat.level = max(stat.level, mem_level)
+ diag_vals.extend(mem_vals)
+ diag_strs.append(mem_str)
+ diag_level = max(diag_level, mem_level)
+
+
+ # Update status
+ self._mutex.acquire()
+ self._last_usage_time = rospy.get_time()
+ self._usage_stat.level = diag_level
+ self._usage_stat.values = diag_vals
+ self._usage_stat.strings = diag_strs
- stat.message = stat_dict[stat.level]
+ self._usage_stat.message = stat_dict[diag_level]
- except Exception, e:
- stat.level = 2
- stat.message = 'Exception'
- stat.strings.append(DiagnosticString(label = 'mpstat Exception', value = str(e)))
+ if not rospy.is_shutdown():
+ self._usage_timer = threading.Timer(5.0, self.check_usage)
+ self._usage_timer.start()
+ else:
+ self.cancel_timers()
- return stat
+ self._mutex.release()
-def main():
- hds = []
+ def publish_stats(self):
+ self._mutex.acquire()
+
+ # Update everything with last update times
+ update_status_stale(self._temp_stat, self._last_temp_time)
+ update_status_stale(self._usage_stat, self._last_usage_time)
+ update_status_stale(self._nfs_stat, self._last_nfs_time)
+ msg = DiagnosticMessage()
+ msg.status.append(self._temp_stat)
+ msg.status.append(self._usage_stat)
+ msg.status.append(self._nfs_stat)
+
+ if rospy.get_time() - self._last_publish_time > 0.5:
+ self._diag_pub.publish(msg)
+ self._last_publish_time = rospy.get_time()
+
+ if not rospy.is_shutdown():
+ self._publish_timer = threading.Timer(1.0, self.publish_stats)
+ self._publish_timer.start()
+ else:
+ self.cancel_timers()
+
+ self._mutex.release()
+
+if __name__ == '__main__':
hostname = socket.gethostname()
-
- rospy.init_node('cpu_monitor_%s' % hostname, anonymous = True)
-
- pub = rospy.Publisher('/diagnostics', DiagnosticMessage)
- no_cpu_temp = rospy.get_param('no_cpu_temp', False)
+ cpu_node = CPUMonitor(hostname)
- while not rospy.is_shutdown():
- msg = DiagnosticMessage()
- msg.status = []
-
- # Temperature, don't check if user says not to
- if not no_cpu_temp:
- msg.status.append(check_cpu_temp(hostname))
-
- # Usage, memory, and load average
- msg.status.append(check_usage(hostname))
- # NFS status
- msg.status.append(check_nfs_stat(hostname))
-
- pub.publish(msg)
- time.sleep(1.0)
+
+
-if __name__ == '__main__':
- main()
-
Deleted: pkg/trunk/pr2/pr2_computer_monitor/scripts/disk_usage.py
===================================================================
--- pkg/trunk/pr2/pr2_computer_monitor/scripts/disk_usage.py 2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/pr2/pr2_computer_monitor/scripts/disk_usage.py 2009-07-21 06:44:50 UTC (rev 19293)
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-# Software License Agreement (BSD License)
-#
-# Copyright (c) 2008, Willow Garage, Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials provided
-# with the distribution.
-# * Neither the name of the Willow Garage nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-# Author: Tully Foote
-
-import roslib
-roslib.load_manifest('runtime_monitor')
-
-from diagnostic_msgs.msg import DiagnosticMessage, DiagnosticStatus, DiagnosticValue, DiagnosticString
-import sys
-import rospy
-import socket
-from subprocess import Popen, PIPE
-
-import time
-
-
-NAME = 'disk_usage_monitor'
-
-def disk_usage_monitor(device, offset=5):
- pub = rospy.Publisher("/diagnostics", DiagnosticMessage)
- rospy.init_node(NAME, anonymous=True)
-
- hostname = socket.gethostbyaddr(socket.gethostname())[0]
-
- while not rospy.is_shutdown():
- try:
- p = Popen(["df", "-P", "--block-size=1G", device], stdout=PIPE, stdin=PIPE, stderr=PIPE)
- res = p.wait()
- (o,e) = p.communicate()
- except OSError, (errno, msg):
- if errno == 4:
- break #ctrl-c interrupt
- else:
- raise
- if (res == 0):
- second_row = o.split("\n")[1]
- g_available = float(second_row.split()[-3])
- #print second_row, g_available
- #sys.exit(-1)
-
- component_string = "Disk Usage for on host "+ hostname + " for device: " +device
-
- if (g_available > offset):
- stat = DiagnosticStatus(0,component_string, "Acceptable Disk Space Avaiable", [DiagnosticValue(float(g_available),"free space (GB)")],[])
- elif (g_available > 1):
- stat = DiagnosticStatus(1,component_string, "Low Disk Space Avaiable", [DiagnosticValue(float(g_available),"free space (GB)")],[])
- else:
- stat = DiagnosticStatus(2,component_string, "Critically Low Disk Space Avaiable", [DiagnosticValue(float(g_available),"free space (GB)")],[])
- else:
- stat = DiagnosticStatus(2,component_string, "Failure to run \"df -P --block-size\"", [],[])
-
- pub.publish(DiagnosticMessage(None, [stat]))
- time.sleep(1)
-
-def disk_usage_monitor_main(argv=sys.argv):
- import optparse
- parser = optparse.OptionParser(usage="usage: disk_usage_monitor device [size in GB default = 5]")
- options, args = parser.parse_args(argv[1:])
- if len(args) == 1:
- device, offset = args[0], 5
- elif len(args) == 2:
- device, offset = args
- try:
- offset = int(offset)
- except:
- parser.error("size-tolerance must be a number")
- else:
- parser.error("Invalid arguments")
- disk_usage_monitor(device, offset)
-
-if __name__ == "__main__":
- try:
- disk_usage_monitor_main(rospy.myargv())
- except KeyboardInterrupt: pass
-
Modified: pkg/trunk/pr2/pr2_computer_monitor/scripts/hd_monitor.py
===================================================================
--- pkg/trunk/pr2/pr2_computer_monitor/scripts/hd_monitor.py 2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/pr2/pr2_computer_monitor/scripts/hd_monitor.py 2009-07-21 06:44:50 UTC (rev 19293)
@@ -41,6 +41,7 @@
import traceback
import threading
+from threading import Timer
import sys, os, time
import subprocess
@@ -55,18 +56,18 @@
hd_temp_error = 55
stat_dict = { 0: 'OK', 1: 'Warning', 2: 'Error' }
+temp_dict = { 0: 'OK', 1: 'Warm', 2: 'Hot' }
-def check_hd_temp(hostname, hds, no_temp_warn):
- stat = DiagnosticStatus()
- stat.level = 0
- stat.name = "%s HD Temps" % hostname
- stat.strings = []
- stat.values = []
+def get_hddtemp_data():
+ hds = ['/dev/sda', '/dev/sdb']
- for index, hd in enumerate(hds):
- stat.strings.append(DiagnosticString(label = 'Disk %d HW addr' % index, value = hd))
- try:
- p = subprocess.Popen('hddtemp %s' % hd,
+ drives = []
+ makes = []
+ temps = []
+
+ try:
+ for hd in hds:
+ p = subprocess.Popen('hddtemp %s' % hd,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE, shell = True)
stdout, stderr = p.communicate()
@@ -74,15 +75,9 @@
stdout = stdout.replace('\n', '')
stderr = stderr.replace('\n', '')
-
- temp_level = retcode
-
- if retcode == 0:
- temp_level = 0
- else:
- temp_level = 2
-
- # Parse STDOUT for temp, device ID
+
+ rospy.logerr('Hddtemp output: %s\n%s' % (stdout, stderr))
+
lst = stdout.split(':')
if len(lst) > 2:
dev_id = lst[1]
@@ -90,130 +85,285 @@
if unicode(tmp).isnumeric():
temp = float(tmp)
- if temp > hd_temp_warn:
- temp_level = 1
- if temp > hd_temp_error:
- temp_level = 2
- else:
- temp = float(0.0)
- temp_level = 2
-
- else:
- dev_id = 'ERROR'
- temp = 0.0
- temp_level = 2
-
- temp_status = stat_dict[temp_level]
- if no_temp_warn and temp_level > 0:
- temp_level = 0
- temp_status = 'Warning Disabled'
+
+ drives.append(hd)
+ makes.append(dev_id)
+ temps.append(temp)
+
+ except:
+ rospy.logerr(traceback.format_exc())
+
+ return drives, makes, temps
- stat.strings.append(DiagnosticString(label = 'Disk %d Temp Status' % index, value = temp_status))
- stat.strings.append(DiagnosticString(label = 'Disk %d Device ID' % index, value = dev_id))
- stat.values.append(DiagnosticValue(label = 'Disk %d Temp' % index, value = temp))
+def get_hddtemp_data_socket(hostname = 'localhost', port = 7634):
+ try:
+ # Retry, see if we get some data
+ for i in range(0, 5):
+ hd_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ hd_sock.connect((hostname, port))
+ sock_data = hd_sock.recv(1024)
+ hd_sock.close()
- stat.level = max(stat.level, temp_level)
-
- except:
- traceback.print_exc()
- stat.values.append(DiagnosticValue(label = 'Disk %d Temp' % index, value = 0))
- stat.strings.append(DiagnosticString(label = 'Disk %d Temp Status' % index, value = stat_dict[2]))
- stat.strings.append(DiagnosticString(label = 'Disk %d Device ID' % index, value = 'No ID'))
- stat.level = 2
- stat.message = stat_dict[2]
+ sock_vals = sock_data.split('|')
+ if len(sock_vals) > 1:
+ break
- stat.message = stat_dict[stat.level]
- return stat
+ idx = 0
+
+ drives = []
+ makes = []
+ temps = []
+ while idx + 5 < len(sock_vals):
+ drives.append(sock_vals[idx + 1])
+ makes.append(sock_vals[idx + 2])
+ temp = float(sock_vals[idx + 3])
+ temps.append(temp)
+
+ idx = idx + 5
-def check_disk_usage(hostname, home_dir):
- stat = DiagnosticStatus()
- stat.level = 0
- stat.name = "%s HD Disk Usage" % hostname
- stat.strings = []
- stat.values = []
+ return drives, makes, temps
+ except:
+ rospy.logerr(traceback.format_exc())
+ return [], [], []
- try:
- p = subprocess.Popen(["df", "-P", "--block-size=1G", home_dir],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate()
- retcode = p.returncode
- if (retcode == 0):
- stat.strings.append(DiagnosticString(label = 'Disk Space Reading', value = 'OK'))
- row_count = 0
- for row in stdout.split('\n'):
- if len(row.split()) < 2:
- continue
- if not unicode(row.split()[1]).isnumeric() or float(row.split()[1]) < 10: # Ignore small drives
- continue
+
+def update_status_stale(stat, last_update_time):
+ time_since_update = rospy.get_time() - last_update_time
+
+ level = stat.level
+ stale_status = 'OK'
+ if time_since_update > 45:
+ stale_status = 'Lagging'
+ level = max(level, 1)
+ if time_since_update > 90:
+ stale_status = 'Stale'
+ level = max(level, 2)
+
+ stat.strings.pop(0)
+ stat.values.pop(0)
+ stat.strings.insert(0, DiagnosticString(label = 'Update Status', value = stale_status))
+ stat.values.insert(0, DiagnosticValue(label = 'Time Since Update', value = time_since_update))
+
+class hdMonitor():
+ def __init__(self, hostname, home_dir = ''):
+ rospy.init_node('hd_monitor_%s' % hostname)
+
+ self._mutex = threading.Lock()
+
+ self._hostname = hostname
+ self._no_temp_warn = rospy.get_param('no_hd_temp_warn', False)
+ self._home_dir = home_dir
+
+ self._diag_pub = rospy.Publisher('/diagnostics', DiagnosticMessage)
+
+ self._temp_stat = DiagnosticStatus()
+ self._temp_stat.name = "%s HD Temperature" % hostname
+ self._temp_stat.level = 2
+ self._temp_stat.message = 'No Data'
+ self._temp_stat.strings = [ DiagnosticString(label = 'Update Status', value = 'No Data' )]
+ self._temp_stat.values = [ DiagnosticValue(label = 'Time Since Last Update', value = 100000 )]
+
+ if self._home_dir != '':
+ self._usage_stat = DiagnosticStatus()
+ self._usage_stat.level = 2
+ self._usage_stat.name = '%s HD Usage' % hostname
+ self._usage_stat.strings = [ DiagnosticString(label = 'Update Status', value = 'No Data' )]
+ self._usage_stat.values = [ DiagnosticValue(label = 'Time Since Last Update', value = 100000) ]
+ self.check_disk_usage()
+
+ self._last_temp_time = 0
+ self._last_usage_time = 0
+ self._last_publish_time = 0
+
+ self.check_temps()
+
+ self._temp_timer = None
+ self._usage_timer = None
+ self._publish_timer = threading.Timer(1.0, self.publish_stats)
+ self._publish_timer.start()
+
+ ## Must have the lock to cancel everything
+ def cancel_timers(self):
+ if self._temp_timer:
+ self._temp_timer.cancel()
+
+ if self._usage_timer:
+ self._usage_timer.cancel()
+
+ if self._publish_timer:
+ self._publish_timer.cancel()
+
+ def check_temps(self):
+ if rospy.is_shutdown():
+ self._mutex.acquire()
+ self.cancel_timers()
+ self._mutex.release()
+ return
+
+ diag_strs = [ DiagnosticString(label = 'Update Status', value = 'OK' ) ]
+ diag_vals = [ DiagnosticValue(label = 'Time Since Last Update', value = 0 ) ]
+ diag_level = 0
+
+
+ drives, makes, temps = get_hddtemp_data_socket()
+ if len(drives) == 0:
+ diag_strs.append(DiagnosticString(label = 'Disk Temp Data', value = 'No hddtemp data'))
+ diag_level = 2
+
+ for index in range(0, len(drives)):
+ temp = temps[index]
+ temp_level = 0
+ if temp > hd_temp_warn:
+ temp_level = 1
+ if temp > hd_temp_error:
+ temp_level = 2
+ diag_level = max(diag_level, temp_level)
+
+ diag_strs.append(DiagnosticString(label = 'Disk %d Temp Status' % index, value = temp_dict[temp_level]))
+ diag_strs.append(DiagnosticString(label = 'Disk %d Mount Pt.' % index, value = drives[index]))
+ diag_strs.append(DiagnosticString(label = 'Disk %d Device ID' % index, value = makes[index]))
+ diag_vals.append(DiagnosticValue(label = 'Disk %d Temp' % index, value = temp))
+
+ self._mutex.acquire()
+ self._last_temp_time = rospy.get_time()
+
+ self._temp_stat.strings = diag_strs
+ self._temp_stat.values = diag_vals
+
+ self._temp_stat.level = diag_level
+
+ # Give No Data message if we have no reading
+ self._temp_stat.message = temp_dict[diag_level]
+ if len(self._temp_stat.values) == 1:
+ self._temp_stat.message = 'No Data'
+
+ if self._no_temp_warn and self._temp_stat.message != 'No Data':
+ self._temp_stat.level = 0
+
+
+ if not rospy.is_shutdown():
+ self._temp_timer = threading.Timer(30.0, self.check_temps)
+ self._temp_timer.start()
+ else:
+ self.cancel_timers()
+
+ self._mutex.release()
+
+ def check_disk_usage(self):
+ diag_strs = [ DiagnosticString(label = 'Update Status', value = 'OK' ) ]
+ diag_vals = [ DiagnosticValue(label = 'Time Since Last Update', value = 0 ) ]
+ diag_level = 0
+
+ try:
+ p = subprocess.Popen(["df", "-P", "--block-size=1G", self._home_dir],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ retcode = p.returncode
+
+ if (retcode == 0):
- row_count += 1
- g_available = float(row.split()[-3])
- name = row.split()[0]
- size = float(row.split()[1])
- mount_pt = row.split()[-1]
+ diag_strs.append(DiagnosticString(label = 'Disk Space Reading', value = 'OK'))
+ row_count = 0
+ for row in stdout.split('\n'):
+ if len(row.split()) < 2:
+ continue
+ if not unicode(row.split()[1]).isnumeric() or float(row.split()[1]) < 10: # Ignore small drives
+ continue
- if (g_available > low_hd_level):
- level = 0
- elif (g_available > critical_hd_level):
- level = 1
- else:
- level = 2
-
- stat.strings.append(DiagnosticString(label = 'Disk %d Name' % row_count, value = name))
- stat.values.append(DiagnosticString(label = 'Disk %d Available' % row_count, value = g_available))
- stat.values.append(DiagnosticString(label = 'Disk %d Size' % row_count, value = size))
- stat.strings.append(DiagnosticString(label = 'Disk %d Status' % row_count, value = stat_dict[level]))
- stat.strings.append(DiagnosticString(label = 'Disk %d Mount Point' % row_count, value = mount_pt))
+ row_count += 1
+ g_available = float(row.split()[-3])
+ name = row.split()[0]
+ size = float(row.split()[1])
+ mount_pt = row.split()[-1]
+
+ if (g_available > low_hd_level):
+ level = 0
+ elif (g_available > critical_hd_level):
+ level = 1
+ else:
+ level = 2
+
+ diag_strs.append(DiagnosticString(
+ label = 'Disk %d Name' % row_count, value = name))
+ diag_vals.append(DiagnosticString(
+ label = 'Disk %d Available' % row_count, value = g_available))
+ diag_vals.append(DiagnosticString(
+ label = 'Disk %d Size' % row_count, value = size))
+ diag_strs.append(DiagnosticString(
+ label = 'Disk %d Status' % row_count, value = stat_dict[level]))
+ diag_strs.append(DiagnosticString(
+ label = 'Disk %d Mount Point' % row_count, value = mount_pt))
+
+ diag_level = max(diag_level, level)
- stat.level = max(level, stat.level)
-
- else:
- stat.strings.append(DiagnosticString(label = 'Disk Space Reading', value = 'Failed'))
- stat.level = 2
+ else:
+ diag_strs.append(DiagnosticString(label = 'Disk Space Reading', value = 'Failed'))
+ diag_level = 2
+
+ except:
+ rospy.logerr(traceback.format_exc())
+ diag_strs.append(DiagnosticString(label = 'Disk Space Reading', value = 'Exception'))
+ diag_strs.append(DiagnosticString(label = 'Disk Space Ex', value = traceback.format_exc()))
- except:
- traceback.print_exc()
- stat.strings.append(DiagnosticString(label = 'Disk Space Reading', value = 'Exception'))
- stat.level = 2
+ diag_level = 2
+
+ stat.message = stat_dict[stat.level]
- stat.message = stat_dict[stat.level]
- return stat
+ # Update status
+ self._mutex.acquire()
+ self._last_usage_time = rospy.get_time()
+ self._usage_stat.level = diag_level
+ self._usage_stat.values = diag_vals
+ self._usage_stat.strings = diag_strs
+
+ self._usage_stat.message = stat_dict[diag_level]
+ if not rospy.is_shutdown():
+ self._usage_timer = threading.Timer(5.0, self.check_disk_usage)
+ self._usage_timer.start()
+ else:
+ self.cancel_timers()
+
+ self._mutex.release()
+
-# TODO: Need to check HD input/output too using iostat
+ def publish_stats(self):
+ self._mutex.acquire()
+ update_status_stale(self._temp_stat, self._last_temp_time)
+
+ msg = DiagnosticMessage()
+ msg.status.append(self._temp_stat)
+ if self._home_dir != '':
+ update_status_stale(self._usage_stat, self._last_usage_time)
+ msg.status.append(self._usage_stat)
+
+ if rospy.get_time() - self._last_publish_time > 0.5:
+ self._diag_pub.publish(msg)
+ self._last_publish_time = rospy.get_time()
-def main():
- hds = []
+ if not rospy.is_shutdown():
+ self._publish_timer = threading.Timer(1.0, self.publish_stats)
+ self._publish_timer.start()
+ else:
+ self.cancel_timers()
- hostname = socket.gethostname()
-
- rospy.init_node('hd_monitor_%s' % hostname, anonymous = True)
-
- pub = rospy.Publisher('/diagnostics', DiagnosticMessage)
+ self._mutex.release()
- home_dir = rospy.myargv()[1]
- hds = rospy.myargv()[2:]
- no_temp_warn = rospy.get_param('no_hd_temp_warn', False)
-
- while not rospy.is_shutdown():
- msg = DiagnosticMessage()
- msg.status = []
- # Temperature
- msg.status.append(check_hd_temp(hostname, hds, no_temp_warn))
-
- # Check disk usage
- msg.status.append(check_disk_usage(hostname, home_dir))
+# TODO: Need to check HD input/output too using iostat
- pub.publish(msg)
- time.sleep(1.0)
+if __name__ == '__main__':
+ hostname = socket.gethostname()
+ home_dir = ''
+ if len(rospy.myargv()) > 1:
+ home_dir = rospy.myargv()[1]
+ hd_monitor = hdMonitor(hostname, home_dir)
+
-if __name__ == '__main__':
- main()
Modified: pkg/trunk/robot_descriptions/pr2/pr2_configurations/pr2_head_cart/cart_monitors.launch
===================================================================
--- pkg/trunk/robot_descriptions/pr2/pr2_configurations/pr2_head_cart/cart_monitors.launch 2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/robot_descriptions/pr2/pr2_configurations/pr2_head_cart/cart_monitors.launch 2009-07-21 06:44:50 UTC (rev 19293)
@@ -2,14 +2,15 @@
<!-- Will have APC Battery drivers in here at some point -->
- <!-- AOpens on PRF, PRG get hot, don't bother warning users -->
+ <!-- AOpens on PRF, PRG get hot, don't bother warning users -->
<param name="no_hd_temp_warn" value="True" />
- <!-- ipmitool doesn't work on these for some reason -->
- <param name="no_cpu_temp" value="True" />
+ <!-- ipmitool doesn't work on AOpen's -->
+ <param name="check_ipmi_tool" value="False" />
+ <param name="enforce_clock_speed value="False" />
<!-- Disk usage monitor -->
- <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="realtime_root"/>
+ <node pkg="pr2_computer_monitor" type="hd_monitor.py" machine="realtime_root"/>
<node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="two_root"/>
<!-- CPU monitors -->
Modified: pkg/trunk/stacks/pr2/pr2_alpha/pr2_monitors.launch
===================================================================
--- pkg/trunk/stacks/pr2/pr2_alpha/pr2_monitors.launch 2009-07-21 06:36:25 UTC (rev 19292)
+++ pkg/trunk/stacks/pr2/pr2_alpha/pr2_monitors.launch 2009-07-21 06:44:50 UTC (rev 19293)
@@ -2,19 +2,21 @@
<!-- AOpens on PRF, PRG get hot, don't bother warning users -->
<param name="no_hd_temp_warn" value="True" />
- <!-- ipmitool doesn't work on these for some reason -->
- <param name="no_cpu_temp" value="True" />
+ <!-- ipmitool doesn't work on AOpen's -->
+ <param name="check_ipmi_tool" value="False" />
+ <param name="enforce_clock_speed value="False" />
+
<!-- Disk usage monitor -->
- <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="realtime_root"/>
- <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="two_root"/>
- <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="three_root"/>
- <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home) /dev/sda" machine="four_root"/>
+ <node pkg="pr2_computer_monitor" type="hd_monitor.py" machine="realtime_root"/>
+ <node pkg="pr2_computer_monitor" type="hd_monitor.py" args="$(optenv HOME /home)" machine="two_root"/>
+ <node pkg="pr2_computer_monitor" type="hd_monitor.py" machine="three_root"/>
+ <node pkg="pr2_computer_monitor" type="hd_monitor.py" machine="four_root"/>
<!-- CPU monitors -->
- <node pkg="pr2_computer_monitor" type="cpu_monitor.py" args="--no_cpu_temp" machine="realtime_root"/>
- <node pkg="pr2_computer_monitor" type="cpu_monitor.py" args="--no_cpu_temp" machine="two_root"/>
- <node pkg="pr2_computer_monitor" type="cpu_monitor.py" args="--no_cpu_temp" machine="three_root"/>
- <node pkg="pr2_computer_monitor" type="cpu_monitor.py" args="--no_cpu_temp" machine="four_root"/>
+ <node pkg="pr2_computer_monitor" type="cpu_monitor.py" machine="realtime_root"/>
+ <node pkg="pr2_computer_monitor" type="cpu_monitor.py" machine="two_root"/>
+ <node pkg="pr2_computer_monitor" type="cpu_monitor.py" machine="three_root"/>
+ <node pkg="pr2_computer_monitor" type="cpu_monitor.py" machine="four_root"/>
</launch>
\ No newline at end of file
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|