Process counts for various processes listed in the conf file on rare
occasions return an incorrect count. We're running version 5.2.1.2 on
Linux (RedHat Enterprise Linux version 4), and this is occurring on all of
our arch's (i386, ia64, and x86_64). Looking at the source for both
5.2.1.2 and 5.4.1.rc1, it looks like the problem lies within
net-snmp/agent/mibgroup/ucd-snmp/proc.c
From the 5.4.1.rc1 version of proc.c at line 450:
while (NULL != (ent = readdir(dir))) {
if(!(ent->d_name[0] >= '0' && ent->d_name[0] <= '9')) continue;
#ifdef USE_PROC_CMDLINE /* old method */
... (skipping the USE_PROC_CMDLINE stuff for brevity)
#else
/* read /proc/XX/status */
sprintf(cmdline,"/proc/%s/status",ent->d_name);
if ((status = fopen(cmdline, "r")) == NULL)
break;
On rare occasions, the directory name returned from the call to readdir()
no longer exists by the time fopen() is called. Since we break from the
while loop when this happens, we are left with whatever the count is up to
that point, which may not be correct. I was able to provide a quick and
dirty fix by replacing "break;" on line 467 with "continue;".
I was able to somewhat reproduce this problem outside of the agent with the
following test program. I found that performing various tasks on the node
while it is running tends to increase the likelihood of a failure:
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
char *
skip_not_white(char *ptr)
{
if (ptr == NULL)
return (NULL);
while (*ptr != 0 && !isspace(*ptr))
ptr++;
if (*ptr == 0 || *ptr == '#')
return (NULL);
return (ptr);
}
char *
skip_white(char *ptr)
{
if (ptr == NULL)
return (NULL);
while (*ptr != 0 && isspace(*ptr))
ptr++;
if (*ptr == 0 || *ptr == '#')
return (NULL);
return (ptr);
}
char *
skip_token(char *ptr)
{
ptr = skip_white(ptr);
ptr = skip_not_white(ptr);
ptr = skip_white(ptr);
return (ptr);
}
int sh_count_procs(char *procname)
{
DIR *dir;
char cmdline[512], *tmpc;
struct dirent *ent;
int len,plen=strlen(procname),total = 0;
FILE *status;
if ((dir = opendir("/proc")) == NULL) return -1;
while (NULL != (ent = readdir(dir))) {
if(!(ent->d_name[0] >= '0' && ent->d_name[0] <= '9')) continue;
/* read /proc/XX/status */
sprintf(cmdline,"/proc/%s/status",ent->d_name);
if ((status = fopen(cmdline, "r")) == NULL)
{
printf("fopen returned null for: %s\n", cmdline);
/* fprintf(stderr, "Error: %s.\n", strerror(errno)); */
break;
}
if (fgets(cmdline, sizeof(cmdline), status) == NULL) {
printf("fgets returned null\n");
fclose(status);
break;
}
fclose(status);
cmdline[sizeof(cmdline)-1] = '\0';
/* XXX: assumes Name: is first */
if (strncmp("Name:",cmdline, 5) != 0)
{
printf("Name string not at beginning\n");
break;
}
tmpc = skip_token(cmdline);
if (!tmpc)
{
printf("skip_token() failed\n");
break;
}
for (len=0;; len++) {
if (tmpc[len] && isgraph(tmpc[len])) continue;
tmpc[len]='\0';
break;
}
if(len==plen && !strncmp(tmpc,procname,plen)) {
total++;
}
}
closedir(dir);
return total;
}
int main(int argc, char **argv)
{
int count = 0;
int j = 0;
for(j=0;j<=100000;j++)
{
count = sh_count_procs("atd");
if(count != 1)
{
printf("Count for atd: %d\n", count);
fprintf(stderr, "Error: %s.\n", strerror(errno));
}
}
return 0;
}
I may be completely off-base here, as there may be some interaction going
on with the agent that I am not aware of. If so, I'm willing to provide
any additional data needed to narrow in on this problem.
Thanks,
--Jeff
Thomas Anders
None
None
Public
|
Date: 2007-09-20 21:56
|
|
Date: 2007-09-20 21:56
|
|
Date: 2007-09-20 20:07
|
|
Date: 2007-09-20 17:23
|
|
Date: 2007-09-19 21:25
|
| Filename | Description | Download |
|---|---|---|
| net-snmp-5.4.1-count-proc.patch | Patch to avoid race condition when counting processes under Linux | Download |
| Field | Old Value | Date | By |
|---|---|---|---|
| close_date | - | 2007-09-20 21:56 | tanders |
| status_id | Open | 2007-09-20 21:56 | tanders |
| resolution_id | None | 2007-09-20 21:56 | tanders |
| assigned_to | nobody | 2007-09-20 21:56 | tanders |
| data_type | 312694 | 2007-09-20 20:07 | tanders |
| category_id | agent | 2007-09-20 20:07 | tanders |
| artifact_group_id | linux | 2007-09-20 20:07 | tanders |
| File Added | 246508: net-snmp-5.4.1-count-proc.patch | 2007-09-20 17:23 | cunnijd |
Copyright © 2010 Geeknet, Inc. All rights reserved. Terms of Use