|
From: Shailabh N. <na...@us...> - 2001-03-06 15:25:10
|
Thanks for the updates John !
2) is a useful reminder that there are other machines out there :-)
3) The global variable increments inside local_exec() are a hangover from
code testing. Removing them is good to get cleaner code (from a cache
viewpoint). But it might also be interesting to introduce a *controlled*
amount of cache pollution in that (or some other) function to better model
realistic applications. Any ideas ?
I'll update the benchmark on the website alongwith some minor code cleanup
(mostly to make it more readable)...
Shailabh Nagar
Enterprise Linux Group, IBM TJ Watson Research Center,
914-945-2851
John Hawkes <ha...@ba...>@lists.sourceforge.net on
03/05/2001 02:29:05 PM
Sent by: lse...@li...
To: lse...@li...
cc:
Subject: [Lse-tech] patch for "reflex" v1.1 benchmark
I attach a patch to Shailabh Nagar's (na...@us...) "reflex" benchmark
to
remedy several flaws and features of version 1.1.
1) Threads are now cloned without the CLONE_FILES directive. That
eliminates
the universal sharing of "struct files", which produces an awful
cacheblock
ping-ponging in fget() as every I/O flogs at the rwlock_t in that
struct.
2) The total[] array elements are now padded out to 128 bytes, rather than
32 bytes, in order to avoid usermode cacheblock ping-ponging on systems
with
128-byte L2 cachelines (like the SGI mips64 Origin2000).
3) The local_exec() cputime-eater was previously sharing a rapidly
incrementing
counter among all the cloned threads, which produced massive usermode
cacheblock ping-ponging. The new local_exec() now touches only
thread-private memory.
4) The calibration() routine now accurately calibrates the "inner loop",
which
means that the -r argument more accurately declares the microsecond
length of each "round".
5) And I did some trivial other changes that don't materially affect the
behavior of the benchmark.
This version of "reflex" is (in my opinion) a much better vehicle for
examining cpu scheduling behavior.
John Hawkes
ha...@en...
diff --exclude-from=ignore.reflex -Naur reflex_1.1/reflex.c
reflex_1.2/reflex.c
--- reflex_1.1/reflex.c Mon Feb 12 14:13:09 2001
+++ reflex_1.2/reflex.c Mon Mar 5 10:28:29 2001
@@ -1,3 +1,5 @@
+/* #define DEBUG_CALIBRATION 1 */
+/* #define DEBUG_TIMING 1 */
/*
* reflex.c - flexible benchmark for Linux SMP scheduler
*
@@ -47,7 +49,7 @@
#define STACK_SIZE (8192)
-#define CLONE_FLAGS (CLONE_VM | CLONE_SIGHAND | CLONE_FS |
CLONE_FILES)
+#define CLONE_FLAGS (CLONE_VM | CLONE_SIGHAND | CLONE_FS)
#define DEF_PERCENT (1)
#define NUM_WARMUP (1)
#define MIN_TRIALS (NUM_WARMUP+5)
@@ -83,9 +85,9 @@
void run_test_time(void);
int bouncer(void *arg);
int (*worker) (void *arg);
-double local_exec(void) ;
+int local_exec(void) ;
double probrange(unsigned long top);
-void calibration(void) ;
+int calibration(void) ;
float variance(int n, float sum, float sum2);
double child_avg();
double child_var();
@@ -96,9 +98,7 @@
char *child_stack ;
-struct timezone tz1;
struct timeval tv1;
-struct timezone tz2;
struct timeval tv2;
struct timeval tvr;
@@ -118,7 +118,6 @@
int valid_test = 1;
int TOKENSZ; /* size of message treated as token */
double rounds_per_microsecond = 0.0 ; /* obtained through calibration */
-int local_exec_count = 0; /* unused */
int comp_nonyield_rounds ; /* number of rounds of execloop for
nonyield*/
int comp_yield_rounds ; /* number of rounds of execloop for
yield*/
@@ -153,7 +152,7 @@
struct _total
{
unsigned long long count;
- char pad[24];
+ char pad[120];
} *total;
@@ -185,9 +184,6 @@
}
}
-
-
-
if ((num_seconds <= 0) ||
(num_children <= 0) || (num_children > MAX_CHILDREN) ||
(num_active <= 0) || (num_active > num_children) ||
@@ -231,13 +227,14 @@
}
/* calibrate internal loops */
- calibration();
- probyield = local_exec_count ;
+ exit_rc = calibration();
+ if (exit_rc) {
+ goto exit_main3;
+ }
TOKENSZ = sizeof(char);
probyield = (100.0-(double)weight_reschedule_idle)/100.0 ;
probyieldmult = probyield / ((1-probyield)*(1-probyield));
-
/*
comp_nonyield_rounds = (int) (uniform(rnd_compute_time) *
rounds_per_microsecond) ;
comp_yield_rounds = (int) (comp_nonyield_rounds * probyieldmult) ;
@@ -560,14 +557,17 @@
restart:
- if (!fquiet) printf (".");
+ if (!fquiet) {
+ printf (".");
+ fflush(stdout);
+ }
prev_y = 0;
y = 0;
/* get the start time */
- rc = gettimeofday (&tv1, &tz1);
+ rc = gettimeofday (&tv1, NULL);
if (rc) {
stop_test = 1;
exit_rc = errno;
@@ -580,11 +580,10 @@
for (i = 0 ; i < num_children ; i++) prev_y += total[i].count;
sleep (num_seconds);
for (i = 0 ; i < num_children ; i++) y += total[i].count;
- // printf("Rerun : Across children : Avg %15.2f \t Var
%15.2f\n",child_avg(),child_var());
/* get end time */
- rc = gettimeofday (&tv2, &tz2);
+ rc = gettimeofday (&tv2, NULL);
if (rc) {
stop_test = 1;
exit_rc = rc;
@@ -596,6 +595,8 @@
/* compute microseconds per yield */
+ // printf("Rerun : Across children : Avg %15.2f \t Var
%15.2f\n",child_avg(),child_var());
+
timersub(&tv2, &tv1, &tvr); /* tvr now contains result of tv2-tv1 */
x = (unsigned long long)tvr.tv_sec * 1000000;
@@ -603,6 +604,14 @@
results[iterations].data = (float)x;
results[iterations].data /= (float)(y - prev_y);
+#ifdef DEBUG_TIMING
+ printf("Counts:");
+ for (i = 0 ; i < num_children ; i++) {
+ printf(" %d:%d",i,(int)total[i].count);
+ }
+ printf("\nTotalCount:%d\n",(int)(y-prev_y));
+#endif /* DEBUG_TIMING */
+
iterations++;
if (confidence(iterations)) {
stop_test = 1;
@@ -612,6 +621,8 @@
if (!fquiet) printf (" Test Completed.\n");
+ // process_data();
+
switch (foutput) {
case 1:
@@ -644,40 +655,77 @@
}
-double local_exec()
+int local_exec()
{
unsigned int a = 0, b=0;
// memcpy(&a,&b,1);
- local_exec_count++;
+ return a+b;
}
-void calibration(void)
+int calibration(void)
{
/* figure out how many loops we can execute per micro second */
int i;
- int count = 0;
- unsigned long n_initial = 100000;
- unsigned long clock1, clock2, clockterm;
-
- clock1 = clock() ;
- clockterm = clock1 + 5*CLOCKS_PER_SEC;
- do {
- for(i=0; i< n_initial; i++)
- {
- local_exec();
- }
- clock2 = clock();
- count++;
- } while (clock2 < clockterm);
+ int count;
+ unsigned long microsecs;
+ int rc, exit_rc;
+
+ count = 10000000;
+ rc = gettimeofday (&tv1, NULL);
+ if (rc) {
+ exit_rc = errno;
+ perror ("gettimeofday failed on tv1 ");
+ return exit_rc;
+ }
+ for (i=1; i<=count; i++) {
+ local_exec();
+ }
+ rc = gettimeofday (&tv2, NULL);
+ if (rc) {
+ exit_rc = errno;
+ perror ("gettimeofday failed on tv2 ");
+ return exit_rc;
+ }
+ timersub(&tv2, &tv1, &tvr); /* tvr now contains result of tv2-tv1 */
- n_initial *= count;
+ microsecs = (tvr.tv_sec * 1000000) + tvr.tv_usec;
+ rounds_per_microsecond = (double)count / (double)microsecs;
- rounds_per_microsecond = ((double)n_initial*CLOCKS_PER_SEC) /
((double)(clock2-clock1) * 100000.0);
+#ifdef DEBUG_CALIBRATION
+ {
+ int test_rounds = rnd_compute_time * rounds_per_microsecond;
+ printf("%d rounds in %u usec is rounds_per_microsecond:%f\n",
+ count, microsecs, rounds_per_microsecond);
+ gettimeofday (&tv1, NULL);
+ for (i=0; i<test_rounds; i++) {
+ local_exec();
+ }
+ gettimeofday (&tv2, NULL);
+ timersub(&tv2, &tv1, &tvr);
+ printf("test: %d rounds in %d secs, %u usecs\n",
+ test_rounds, tvr.tv_sec, tvr.tv_usec);
+ gettimeofday (&tv1, NULL);
+ for (i=0; i<test_rounds; i++) {
+ local_exec();
+ }
+ gettimeofday (&tv2, NULL);
+ timersub(&tv2, &tv1, &tvr);
+ printf("test: %d rounds in %d secs, %u usecs\n",
+ test_rounds, tvr.tv_sec, tvr.tv_usec);
+ gettimeofday (&tv1, NULL);
+ for (i=0; i<test_rounds; i++) {
+ local_exec();
+ }
+ gettimeofday (&tv2, NULL);
+ timersub(&tv2, &tv1, &tvr);
+ printf("test: %d rounds in %d secs, %u usecs\n",
+ test_rounds, tvr.tv_sec, tvr.tv_usec);
+ }
+#endif
-// rounds_per_microsecond = (n_initial*1000000) /
((double)(clock2-clock1)*CLOCKS_PER_SEC);
- /* printf(">> [%ld] %ld %ld
%lf\n",CLOCKS_PER_SEC,n_initial,(clock2-clock1),rounds_per_microsecond); */
+ return 0;
}
/******************** Statistical functions
**********************************/
_______________________________________________
Lse-tech mailing list
Lse...@li...
http://lists.sourceforge.net/lists/listinfo/lse-tech
|