You can subscribe to this list here.
2009 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
(2) |
Jul
(5) |
Aug
(4) |
Sep
(5) |
Oct
(3) |
Nov
(6) |
Dec
(16) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2010 |
Jan
(3) |
Feb
|
Mar
(2) |
Apr
(4) |
May
(17) |
Jun
|
Jul
|
Aug
(1) |
Sep
(1) |
Oct
(1) |
Nov
|
Dec
(1) |
2011 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
|
Nov
|
Dec
|
2012 |
Jan
|
Feb
(2) |
Mar
(2) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <gs...@us...> - 2012-03-15 22:25:17
|
Revision: 82 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=82&view=rev Author: gshi Date: 2012-03-15 22:25:11 +0000 (Thu, 15 Mar 2012) Log Message: ----------- make config numa override the auto discovered numa Modified Paths: -------------- trunk/cuda_wrapper.c Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2012-03-03 21:14:34 UTC (rev 81) +++ trunk/cuda_wrapper.c 2012-03-15 22:25:11 UTC (rev 82) @@ -413,42 +413,44 @@ const char* msg; int cpu_cores[256]; int ncores=256; - int rc = cpulistaffinity(phys_gpu, cpu_cores, &ncores); - if(rc == 0){ - for(i=0;i < ncores;i++){ - CPU_SET(cpu_cores[i], &cpu_set); - } - cpu_set_valid =1; - msg="proc"; + if (wrapper_info->GPUHasAffinity[phys_gpu]) { + /* + * Install affinity mask for the calling thread. For good measure call + * sched_yield() to enforce the mask, though I am not sure if this even + * works (may turn out to be a no-op). + */ + cpu_set = wrapper_info->GPUAffinity[phys_gpu]; + cpu_set_valid = 1; - }else{ - if (wrapper_info->GPUHasAffinity[phys_gpu]) { - /* - * Install affinity mask for the calling thread. For good measure call - * sched_yield() to enforce the mask, though I am not sure if this even - * works (may turn out to be a no-op). - */ - cpu_set = wrapper_info->GPUAffinity[phys_gpu]; - cpu_set_valid = 1; - - if(cuda_wrapper_verbose){ - msg= "config"; - ncores = CPU_COUNT(&cpu_set); - int j = 0; - for(i=0;i< MAX_CPU_CORES_PER_NODE;i++){ - if(CPU_ISSET(i, &cpu_set)){ - cpu_cores[j]=i; - j++; - } + if(cuda_wrapper_verbose){ + msg= "config"; + ncores = CPU_COUNT(&cpu_set); + int j = 0; + for(i=0;i< MAX_CPU_CORES_PER_NODE;i++){ + if(CPU_ISSET(i, &cpu_set)){ + cpu_cores[j]=i; + j++; } - if(j != ncores ){ - FPRINTF("Warning: number of cpu cores does not match (%s)\n", __FUNCTION__); - } } - + if(j != ncores ){ + FPRINTF("Warning: number of cpu cores does not match (%s)\n", __FUNCTION__); + } + } + } + + if(!cpu_set_valid){ + int rc = cpulistaffinity(phys_gpu, cpu_cores, &ncores); + if(rc == 0){ + for(i=0;i < ncores;i++){ + CPU_SET(cpu_cores[i], &cpu_set); + } + cpu_set_valid =1; + msg="proc"; } } - + + + if(cpu_set_valid){ if(cuda_wrapper_verbose){ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2012-03-03 21:14:40
|
Revision: 81 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=81&view=rev Author: gshi Date: 2012-03-03 21:14:34 +0000 (Sat, 03 Mar 2012) Log Message: ----------- bug fix for affnity Modified Paths: -------------- trunk/cuda_wrapper.c Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2012-02-28 21:39:55 UTC (rev 80) +++ trunk/cuda_wrapper.c 2012-03-03 21:14:34 UTC (rev 81) @@ -461,7 +461,7 @@ int ret = sched_setaffinity(cuda_wrapper_getid(), sizeof(cpu_set_t), - &wrapper_info->GPUAffinity[phys_gpu]); + &cpu_set); if (ret){ FPRINTF("Warning!! Unable to setup processor affinity for virtual device %d, Physical device: %d\n", device, phys_gpu); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2012-02-28 21:40:01
|
Revision: 80 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=80&view=rev Author: gshi Date: 2012-02-28 21:39:55 +0000 (Tue, 28 Feb 2012) Log Message: ----------- added Galen's affinity CPU code. Now the GPU process will try to find affnity cpu cores from proc FS. If that fails, then it will use the affnity info from the config file Modified Paths: -------------- trunk/Makefile trunk/cuda_wrapper.c trunk/cuda_wrapper.h Added Paths: ----------- trunk/affinity_for_nvidia.c Modified: trunk/Makefile =================================================================== --- trunk/Makefile 2012-02-27 16:55:09 UTC (rev 79) +++ trunk/Makefile 2012-02-28 21:39:55 UTC (rev 80) @@ -3,7 +3,7 @@ # University of Illinois/NCSA # Open Source License # -# Copyright \xA9 2009, University of Illinois. All rights reserved. +# Copyright © 2009, University of Illinois. All rights reserved. # # Developed by: # @@ -44,7 +44,7 @@ OPENCL_LIB_DIR ?=/usr/local/opencl/lib CC=gcc -CFLAGS = -fPIC -Wall +CFLAGS = -c -D_GNU_SOURCE -fPIC -Wall CUDA_INC_DIR = ${CUDA_HOME}/include CUDA_LIB_DIR =${CUDA_HOME}/lib64 @@ -57,6 +57,8 @@ OPENCL_LINK_LIB= endif +CUDA_WRAPPER_OBJS=cuda_wrapper.o wrapper_common.o affinity_for_nvidia.o ${OPENCL_WRAPPER_OBJ} + all: wrapper_init wrapper_terminate wrapper_query cuda_wrapper.so cuda_memscrubber cuda_affinity_test opencl: wrapper_init wrapper_terminate wrapper_query opencl_wrapper.so @@ -75,8 +77,8 @@ $(CC) -c -I. -D_GNU_SOURCE wrapper_query.c $(CC) wrapper_query.o wrapper_common.o -o wrapper_query -lpthread -cuda_wrapper.so: cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} - $(CC) -shared -I. -I$(CUDA_INC_DIR) cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} -o cuda_wrapper.so -ldl -lpthread ${OPENCL_LINK_LIB} +cuda_wrapper.so: ${CUDA_WRAPPER_OBJS} + $(CC) -shared -I. -I$(CUDA_INC_DIR) ${CUDA_WRAPPER_OBJS} -o $@ -ldl -lpthread ${OPENCL_LINK_LIB} opencl_wrapper.so: wrapper_common.o opencl_wrapper.o $(CC) -shared wrapper_common.o opencl_wrapper.o -o opencl_wrapper.so -ldl ${OPENCL_LINK_LIB} @@ -89,6 +91,9 @@ cuda_memscrubber: cuda_memscrubber.c $(CC) -O3 -funroll-all-loops -I. -I$(CUDA_INC_DIR) cuda_memscrubber.c -o cuda_memscrubber -L$(CUDA_LIB_DIR) -lcuda -lpthread +affinity_for_nvidia.o: affinity_for_nvidia.c + ${CC} ${CFLAGS} affinity_for_nvidia.c + cuda_affinity_test: cuda_affinity_test.c ${CC} -O3 -I${CUDA_INC_DIR} -o $@ $< -L${CUDA_LIB_DIR} -lnuma -lcudart Added: trunk/affinity_for_nvidia.c =================================================================== --- trunk/affinity_for_nvidia.c (rev 0) +++ trunk/affinity_for_nvidia.c 2012-02-28 21:39:55 UTC (rev 80) @@ -0,0 +1,170 @@ + +// Galen Arnold , ar...@nc... # for support and bug reports +// Guochun Shi +// Jan., 2012 +// NCSA +#include <stdio.h> +#include <stdlib.h> +#include <sched.h> +#include <unistd.h> +#include <string.h> + +int process_core_string_list(const char* _str, int* list, int* ncores); +int process_core_string_item(const char* str, int* sub_list, int* sub_ncores); + +// cpulistaffinity() makes the association between the numbered gpu device "my_gpu" and +// the cpu cores associated with it by following the path name of the pci_bus listed +// under the nvidia driver in /proc . +int cpulistaffinity(int my_gpu, int *cpu_cores, int* ncores) +{ + FILE *nvidia_info, *pci_bus_info; + size_t nbytes = 255; + //int core3, core4; // fillers for sscanf() + char *my_line; + char nvidia_info_path[255], pci_bus_info_path[255]; + char bus_info[255]; + + // the nvidia driver populates this path for each gpu + sprintf(nvidia_info_path,"/proc/driver/nvidia/gpus/%d/information", my_gpu); + nvidia_info= fopen(nvidia_info_path,"r"); + if (nvidia_info == NULL){ + return -1; + } + + my_line= (char *) malloc(nbytes +1); + if (my_line == NULL){ + printf("Error: allocating memory for my_line failed"); + exit(-1); + } + + while (!feof(nvidia_info)){ + if ( -1 == getline(&my_line, &nbytes, nvidia_info)){ + break; + }else{ + // the first 7 char of the Bus Location will lead to the corresponding + // path under /sys/class/pci_bus/ , cpulistaffinity showing cores on that + // bus is located there + if ( 1 == sscanf(my_line,"Bus Location: %s", bus_info )){ + sprintf(pci_bus_info_path,"/sys/class/pci_bus/%.7s/cpulistaffinity", + bus_info); + } + } + } + // open the cpulistaffinity file on the pci_bus for "my_gpu" + pci_bus_info= fopen(pci_bus_info_path,"r"); + if (pci_bus_info == NULL){ + free(my_line); + fclose(nvidia_info); + return -1; + } + + while (!feof(pci_bus_info)){ + if ( -1 == getline(&my_line, &nbytes, pci_bus_info)){ + break; + } else{ + int rc = process_core_string_list(my_line, cpu_cores, ncores); + if(rc < 0){ + printf("Warning:%s: processing the line (%s) failed\n", __FUNCTION__, my_line); + free(my_line); + fclose(nvidia_info); + return -1; + } + } + } + + free(my_line); + return(0); +} + + +int process_core_string_list(const char* _str, int* list, int* ncores) +{ + /* The input string @str should be separated by comma, and each item can be + * either a number or a range (see the comments in process_core_string_item + * function) + * + */ + + if(_str == NULL || list == NULL || ncores == NULL + || *ncores <= 0){ + printf("ERROR: Invalid arguments in function %s\n", __FUNCTION__ ); + return -1; + } + + char str[256]; + strncpy(str, _str, sizeof(str)); + + int left_space = *ncores; + int tot_cores = 0; + + char* item = strtok(str, ","); + if(item == NULL){ + printf("ERROR: Invalid string format(%s)\n", str); + return -1; + } + + do { + int sub_ncores = left_space; + int* sub_list = list + tot_cores; + + int rc = process_core_string_item(item, sub_list, &sub_ncores); + if(rc <0){ + printf("ERROR: processing item(%s) failed\n", item); + return -1; + } + + tot_cores += sub_ncores; + left_space -= sub_ncores; + + item = strtok(NULL, ","); + }while( item != NULL); + + *ncores = tot_cores; + return 0; +} + + +int process_core_string_item(const char* str, int* sub_list, int* sub_ncores) +{ + /* assume the input format is one of the following two + * 1. a number only, e.g. 5 + * 2. a range, e.g 4-6, which means three numbers 4,5,6 + * return a list of numbers in @sub_list and and the total numbers + * in @sub_ncores + */ + int i; + if(str == NULL || sub_list == NULL || sub_ncores == NULL || + *sub_ncores <= 0){ + printf("ERROR: Wrong parameters in function %s!\n", __FUNCTION__); + return -1; + } + + if(strstr(str, "-") != NULL){ + //a range + int low_core, high_core; + if (sscanf(str,"%d-%d",&low_core, &high_core) != 2){ + printf("ERROR: range scan failed\n"); + return -1; + } + if(*sub_ncores < high_core-low_core +1){ + printf("ERROR: not enough space in sub_list\n"); + return -1; + } + + for(i = 0; i < high_core-low_core +1; i++){ + sub_list[i] = i + low_core; + } + *sub_ncores = high_core - low_core +1; + + }else{ + //a number + int core; + if (sscanf(str, "%d", &core) != 1){ + printf("ERROR: wrong format for core number\n"); + return -1; + } + sub_list[0] = core; + *sub_ncores =1; + } + return 0; +} Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2012-02-27 16:55:09 UTC (rev 79) +++ trunk/cuda_wrapper.c 2012-02-28 21:39:55 UTC (rev 80) @@ -96,6 +96,7 @@ static __host__ cudaError_t CUDARTAPI (*real_cudaEventRecord) (cudaEvent_t event, cudaStream_t stream); +extern int cpulistaffinity(int my_gpu, int *cpu_cores, int* ncores); static void* (*real_dlopen)(const char* filename, int flag); @@ -397,24 +398,76 @@ if(cuda_wrapper_num_affinity_disabled){ return 0; } - int ret; - assert(device <= wrapper_info->nGPU); - int phys_gpu = wrapper_virt_to_phys(device); + + assert(device <= wrapper_info->nGPU); + int phys_gpu = wrapper_virt_to_phys(device); + + /*Go to proc filesystem for cpu affnity directly (nvidida only) + *otherwise, use the recorded ones + */ + cpu_set_t cpu_set; + CPU_ZERO(&cpu_set); + int cpu_set_valid = 0; + + int i; + const char* msg; + int cpu_cores[256]; + int ncores=256; + int rc = cpulistaffinity(phys_gpu, cpu_cores, &ncores); + if(rc == 0){ + for(i=0;i < ncores;i++){ + CPU_SET(cpu_cores[i], &cpu_set); + } + cpu_set_valid =1; + msg="proc"; + + }else{ if (wrapper_info->GPUHasAffinity[phys_gpu]) { - /* - * Install affinity mask for the calling thread. For good measure call - * sched_yield() to enforce the mask, though I am not sure if this even - * works (may turn out to be a no-op). - */ - ret = sched_setaffinity(cuda_wrapper_getid(), + /* + * Install affinity mask for the calling thread. For good measure call + * sched_yield() to enforce the mask, though I am not sure if this even + * works (may turn out to be a no-op). + */ + cpu_set = wrapper_info->GPUAffinity[phys_gpu]; + cpu_set_valid = 1; + + if(cuda_wrapper_verbose){ + msg= "config"; + ncores = CPU_COUNT(&cpu_set); + int j = 0; + for(i=0;i< MAX_CPU_CORES_PER_NODE;i++){ + if(CPU_ISSET(i, &cpu_set)){ + cpu_cores[j]=i; + j++; + } + } + if(j != ncores ){ + FPRINTF("Warning: number of cpu cores does not match (%s)\n", __FUNCTION__); + } + } + + } + } + + if(cpu_set_valid){ + + if(cuda_wrapper_verbose){ + printf("setting GPU device %d to affinity CPU cores with info from %s:", phys_gpu, msg); + for(i=0;i < ncores;i++){ + printf("%d ", cpu_cores[i]); + } + printf("\n"); + } + + int ret = sched_setaffinity(cuda_wrapper_getid(), sizeof(cpu_set_t), &wrapper_info->GPUAffinity[phys_gpu]); - if (ret){ - FPRINTF("Warning!! Unable to setup processor affinity for virtual device %d, Physical device: %d\n", device, phys_gpu); - } + if (ret){ + FPRINTF("Warning!! Unable to setup processor affinity for virtual device %d, Physical device: %d\n", device, phys_gpu); } - - return 0; + } + + return 0; } Modified: trunk/cuda_wrapper.h =================================================================== --- trunk/cuda_wrapper.h 2012-02-27 16:55:09 UTC (rev 79) +++ trunk/cuda_wrapper.h 2012-02-28 21:39:55 UTC (rev 80) @@ -46,6 +46,7 @@ #define MAGIC_JOBID 0x7a3a4b5c #define MAX_GPUS_PER_NODE 8 #define MAX_DEVICES_PER_NODE 8 +#define MAX_CPU_CORES_PER_NODE 64 #define WRAPPER_SHMEM_PERM 0666 #define MAX_KEY_ENV_VAR 32 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2012-02-27 16:55:19
|
Revision: 79 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=79&view=rev Author: gshi Date: 2012-02-27 16:55:09 +0000 (Mon, 27 Feb 2012) Log Message: ----------- change the max path string to 1024 from 128 Modified Paths: -------------- trunk/cuda_wrapper.c Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2011-09-27 22:14:28 UTC (rev 78) +++ trunk/cuda_wrapper.c 2012-02-27 16:55:09 UTC (rev 79) @@ -470,8 +470,8 @@ } - char prog[128]; - if (getexename(prog, 128) == NULL){ + char prog[1024]; + if (getexename(prog, 1024) == NULL){ FPRINTF("Getting exe name failed\n"); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2011-09-27 22:14:33
|
Revision: 78 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=78&view=rev Author: gshi Date: 2011-09-27 22:14:28 +0000 (Tue, 27 Sep 2011) Log Message: ----------- uplift max of Cpu cores per node to 64 Modified Paths: -------------- trunk/wrapper_common.c Modified: trunk/wrapper_common.c =================================================================== --- trunk/wrapper_common.c 2010-12-14 23:44:50 UTC (rev 77) +++ trunk/wrapper_common.c 2011-09-27 22:14:28 UTC (rev 78) @@ -64,7 +64,7 @@ for(i=0; i < MAX_GPUS_PER_NODE;i++){ if (wp->GPUHasAffinity[i]){ printf("\t\tGPU=%d, CPU=", i); - for(j=0; j < 8; j++){ + for(j=0; j < 64; j++){ if (CPU_ISSET(j, &wp->GPUAffinity[i])){ printf("%d ", j); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-12-14 23:44:56
|
Revision: 77 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=77&view=rev Author: gshi Date: 2010-12-14 23:44:50 +0000 (Tue, 14 Dec 2010) Log Message: ----------- update README: document some newly implemented enviroment variables Modified Paths: -------------- trunk/cuda_wrapper.c trunk/readme_user.txt Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2010-10-28 22:54:24 UTC (rev 76) +++ trunk/cuda_wrapper.c 2010-12-14 23:44:50 UTC (rev 77) @@ -259,10 +259,10 @@ if (getenv("CUDA_VIRT_DISABLE")){ cuda_virt_disabled= 1; } - if (getenv("CUDA_WRAPPER_NUMA_AFFINITY_DISABLED")){ + if (getenv("CUDA_WRAPPER_NUMA_AFFINITY_DISABLE")){ cuda_wrapper_num_affinity_disabled=1; } - if (getenv("CUDA_WRAPPER_DEVICE_CONSTRAIN_DISABLED")){ + if (getenv("CUDA_WRAPPER_DEVICE_CONSTRAIN_DISABLE")){ cuda_device_constrain_disabled=1; } if (getenv("CUDA_WRAPPER_PROFILE_ENABLE")){ Modified: trunk/readme_user.txt =================================================================== --- trunk/readme_user.txt 2010-10-28 22:54:24 UTC (rev 76) +++ trunk/readme_user.txt 2010-12-14 23:44:50 UTC (rev 77) @@ -71,10 +71,12 @@ Controlling Environment Variables: * CUDA_WRAPPER_VERBOSE: Display debug/verbose output during execution. The default is non-verbose when this variable is unset. + * CUDA_WRAPPER_PASSTHRU: Enables bypassing the wrapper library. This feature is always enabled for root, and is unset for users by default. This feature is enabled by default, but may be disabled by the administrator. + * CUDA_VIRT_DISABLE: Disables virtualized (zero indexed) device mapping. This is the same as CUDA_WRAPPER_PASSTHRU except it maintains concealment of devices you don't have @@ -82,3 +84,28 @@ and functions depend on zero indexed device id's and make break by setting this. +* CUDA_WRAPPER_NUMA_AFFINITY_DISABLE: + Once this environment variable is set, the cuda + wrapper library will stop bind the GPU thread + to nearest CPU socket. The GPU+CPU combiation + will be handled by the OS and is probably random. + +* CUDA_WRAPPER_DEVICE_CONSTRAIN_DISABLE: + Once this variable is set, the cuda wrapper will + stop changing the program's default GPU to one of + its assigned GPUs, i.e. all programs will run on + GPU 0 if they do not explicitly set which GPU to + use. + +* CUDA_WRAPPER_PROFILE_ENABLE: + This environment variable enable the cuda wrapper + to profile the program for its GPU usage. The + overall GPU kernel run time will be recorded in + the system shmem and be queried during job using + wrapper_query. This feature incurs some performance + penalty and should be used carefully. + + + + + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-10-28 22:54:30
|
Revision: 76 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=76&view=rev Author: gshi Date: 2010-10-28 22:54:24 +0000 (Thu, 28 Oct 2010) Log Message: ----------- adding the cuda include path in the opencl compiling path by default (after the user designated path if any) Modified Paths: -------------- trunk/Makefile Modified: trunk/Makefile =================================================================== --- trunk/Makefile 2010-09-07 23:09:06 UTC (rev 75) +++ trunk/Makefile 2010-10-28 22:54:24 UTC (rev 76) @@ -84,7 +84,7 @@ $(CC) -g -I. -I$(CUDA_INC_DIR) ${CFLAGS} -D_GNU_SOURCE -c cuda_wrapper.c opencl_wrapper.o: opencl_wrapper.c - ${CC} -g -I${OPENCL_INC_DIR} ${CFLAGS} -D_GNU_SOURCE -c opencl_wrapper.c + ${CC} -g -I${OPENCL_INC_DIR} -I${CUDA_INC_DIR} ${CFLAGS} -D_GNU_SOURCE -c opencl_wrapper.c cuda_memscrubber: cuda_memscrubber.c $(CC) -O3 -funroll-all-loops -I. -I$(CUDA_INC_DIR) cuda_memscrubber.c -o cuda_memscrubber -L$(CUDA_LIB_DIR) -lcuda -lpthread This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-09-07 23:09:12
|
Revision: 75 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=75&view=rev Author: gshi Date: 2010-09-07 23:09:06 +0000 (Tue, 07 Sep 2010) Log Message: ----------- need to link to pthread by default since we overloaded the pthread_create function Modified Paths: -------------- trunk/Makefile Modified: trunk/Makefile =================================================================== --- trunk/Makefile 2010-08-13 18:25:14 UTC (rev 74) +++ trunk/Makefile 2010-09-07 23:09:06 UTC (rev 75) @@ -76,7 +76,7 @@ $(CC) wrapper_query.o wrapper_common.o -o wrapper_query -lpthread cuda_wrapper.so: cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} - $(CC) -shared -I. -I$(CUDA_INC_DIR) cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} -o cuda_wrapper.so -ldl ${OPENCL_LINK_LIB} + $(CC) -shared -I. -I$(CUDA_INC_DIR) cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} -o cuda_wrapper.so -ldl -lpthread ${OPENCL_LINK_LIB} opencl_wrapper.so: wrapper_common.o opencl_wrapper.o $(CC) -shared wrapper_common.o opencl_wrapper.o -o opencl_wrapper.so -ldl ${OPENCL_LINK_LIB} @@ -93,7 +93,7 @@ ${CC} -O3 -I${CUDA_INC_DIR} -o $@ $< -L${CUDA_LIB_DIR} -lnuma -lcudart clean: - rm -f *.o cuda_wrapper.so wrapper_terminate wrapper_init wrapper_query cuda_memscrubber + rm -f *.o cuda_wrapper.so wrapper_terminate wrapper_init wrapper_query cuda_memscrubber cuda_affinity_test install: install -cD cuda_wrapper.so /usr/local/cuda_wrapper/lib64/cuda_wrapper.so install -cD -m 700 wrapper_init /usr/local/cuda_wrapper/bin/wrapper_init This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-08-13 18:25:21
|
Revision: 74 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=74&view=rev Author: gshi Date: 2010-08-13 18:25:14 +0000 (Fri, 13 Aug 2010) Log Message: ----------- disable profiling by default to enable it, set the evnrioment variable CUDA_WRAPPER_PROFILE_ENABLE Modified Paths: -------------- trunk/Makefile trunk/cuda_wrapper.c Modified: trunk/Makefile =================================================================== --- trunk/Makefile 2010-05-26 18:29:58 UTC (rev 73) +++ trunk/Makefile 2010-08-13 18:25:14 UTC (rev 74) @@ -100,4 +100,3 @@ install -cD -m 700 wrapper_terminate /usr/local/cuda_wrapper/bin/wrapper_terminate install -cD wrapper_query /usr/local/cuda_wrapper/bin/wrapper_query install -cD cuda_memscrubber /usr/local/cuda_wrapper/bin/cuda_memscrubber - Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2010-05-26 18:29:58 UTC (rev 73) +++ trunk/cuda_wrapper.c 2010-08-13 18:25:14 UTC (rev 74) @@ -84,6 +84,7 @@ static __host__ cudaError_t CUDARTAPI (*real_cudaChooseDevice)(int *device, const struct cudaDeviceProp *prop); static __host__ cudaError_t CUDARTAPI (*real_cudaSetValidDevices)(int *device, int n); static __host__ cudaError_t CUDARTAPI (*real_cudaLaunch)(const char* entry); +static __host__ cudaError_t CUDARTAPI (*real_cudaConfigureCall)(dim3, dim3, size_t, cudaStream_t); /* These event API are not wrappered but we need to resolve them so that we can use them * even if the user application is not linked to the cuda runtime library. @@ -110,6 +111,7 @@ static int cuda_exec = 0; extern int cuda_wrapper_magic_job_enabled; +static __thread cudaStream_t kernel_stream = (cudaStream_t)0; static __thread cudaEvent_t start_event, stop_event; static __thread double gpu_kernel_time =0; static __thread int cuda_exec_type=0; @@ -118,6 +120,7 @@ static pthread_mutex_t total_gpu_kernel_time_lock = PTHREAD_MUTEX_INITIALIZER; static int cuda_wrapper_num_affinity_disabled = 0; static int cuda_device_constrain_disabled = 0; +static int profile_enabled = 0; #define CUDA_GDB_ENV_STRING "__CUDA_GDB_RUNNING__" @@ -262,6 +265,10 @@ if (getenv("CUDA_WRAPPER_DEVICE_CONSTRAIN_DISABLED")){ cuda_device_constrain_disabled=1; } + if (getenv("CUDA_WRAPPER_PROFILE_ENABLE")){ + profile_enabled = 1; + } + /* Attach to the shared memory segment for this node */ wrapper_info = wrapper_attach_shmem_segment(); if (!real_dlopen){ @@ -284,6 +291,7 @@ real_cudaChooseDevice = get_symbol(cuda_hdl, "cudaChooseDevice"); real_cudaSetValidDevices = get_symbol(cuda_hdl, "cudaSetValidDevices"); real_cudaLaunch = get_symbol(cuda_hdl, "cudaLaunch"); + real_cudaConfigureCall = get_symbol(cuda_hdl, "cudaConfigureCall"); real_cudaEventCreate = get_symbol(cuda_hdl, "cudaEventCreate"); real_cudaEventDestroy = get_symbol(cuda_hdl, "cudaEventDestroy"); @@ -532,7 +540,7 @@ if(cuda_exec && wrapper_info ){ wrapper_info->pid = -1; - if (cuda_exec_type == CUDA_EXEC_RUNTIME){ + if (cuda_exec_type == CUDA_EXEC_RUNTIME && profile_enabled){ if (real_cudaEventQuery(stop_event) == cudaSuccess){ float tmp; if (real_cudaEventElapsedTime(&tmp, start_event, stop_event) == cudaSuccess){ @@ -1116,6 +1124,16 @@ } __host__ cudaError_t CUDARTAPI +cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream) +{ + + kernel_stream = stream; + return real_cudaConfigureCall(gridDim, blockDim, sharedMem, stream); + +} + + +__host__ cudaError_t CUDARTAPI cudaLaunch(const char* entry) { @@ -1132,6 +1150,10 @@ wrapper_info->cuda_exec_type |= CUDA_EXEC_RUNTIME; cuda_exec_type |= CUDA_EXEC_RUNTIME; + if (!profile_enabled){ + return real_cudaLaunch(entry); + } + static __thread int first_time = 1; if (first_time){ real_cudaEventCreate(&start_event); @@ -1139,14 +1161,12 @@ } - //printf("%s is called in file %s\n", __FUNCTION__, __FILE__); - cudaError_t ret; if (first_time){ - real_cudaEventRecord(start_event, 0); + real_cudaEventRecord(start_event, kernel_stream); ret = (*real_cudaLaunch)(entry); - real_cudaEventRecord(stop_event, 0); + real_cudaEventRecord(stop_event, kernel_stream); }else{ if (real_cudaEventQuery(stop_event) == cudaSuccess){ float tmp; @@ -1158,9 +1178,9 @@ real_cudaEventCreate(&stop_event); gpu_kernel_time += 0.001*tmp; - real_cudaEventRecord(start_event, 0); + real_cudaEventRecord(start_event, kernel_stream); ret = (*real_cudaLaunch)(entry); - real_cudaEventRecord(stop_event, 0); + real_cudaEventRecord(stop_event, kernel_stream); }else{ @@ -1172,7 +1192,7 @@ real_cudaEventDestroy(stop_event); real_cudaEventCreate(&stop_event); - real_cudaEventRecord(stop_event,0); + real_cudaEventRecord(stop_event,kernel_stream); } } @@ -1224,7 +1244,7 @@ static void update_gpu_kernel_time() { - if(cuda_exec && wrapper_info && (cuda_exec_type == CUDA_EXEC_RUNTIME) ){ + if(cuda_exec && wrapper_info && (cuda_exec_type == CUDA_EXEC_RUNTIME) && profile_enabled ){ if (real_cudaEventQuery(stop_event) == cudaSuccess){ float tmp; real_cudaEventElapsedTime(&tmp, start_event, stop_event); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-26 18:30:04
|
Revision: 73 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=73&view=rev Author: gshi Date: 2010-05-26 18:29:58 +0000 (Wed, 26 May 2010) Log Message: ----------- wrong index for gpu devices fixed Modified Paths: -------------- trunk/opencl_wrapper.c Modified: trunk/opencl_wrapper.c =================================================================== --- trunk/opencl_wrapper.c 2010-05-26 18:13:44 UTC (rev 72) +++ trunk/opencl_wrapper.c 2010-05-26 18:29:58 UTC (rev 73) @@ -228,13 +228,14 @@ FPRINTF("Error: Invalid phys device number(%d)\n", index); continue; } + rc = clGetDeviceInfo(all_devices[non_cpu_devices[index]], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL); if (rc != CL_SUCCESS){ goto out; } if( (target_device_type == CL_DEVICE_TYPE_ALL) || (target_device_type == type)){ if (devices){ - devices[output_n] = all_devices[index]; + devices[output_n] = all_devices[non_cpu_devices[index]]; } output_n++; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-26 18:13:51
|
Revision: 72 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=72&view=rev Author: gshi Date: 2010-05-26 18:13:44 +0000 (Wed, 26 May 2010) Log Message: ----------- fixed an possible infinite loop caused by invalid entry in gpu map file Modified Paths: -------------- trunk/wrapper_init.c Modified: trunk/wrapper_init.c =================================================================== --- trunk/wrapper_init.c 2010-05-26 14:35:58 UTC (rev 71) +++ trunk/wrapper_init.c 2010-05-26 18:13:44 UTC (rev 72) @@ -310,11 +310,21 @@ * right format. */ wrapper->nGPU = 0; - while (!feof(fin)){ - fscanf(fin, "%d\n", &wrapper->physGPU[wrapper->nGPU]); + + char buf[128]; + char* s = NULL; + while( (s = fgets(buf, sizeof(buf), fin)) != NULL){ + if (*s == '\n' || *s == ' ' || *s == '\t'){ + continue; + } + if (sscanf(s, "%d\n", &wrapper->physGPU[wrapper->nGPU]) <=0){ + printf("ERROR: invalid entry(%s)\n", s); + exit(1); + } wrapper->nGPU++; } - + + /* Close file */ fclose(fin); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-26 14:36:04
|
Revision: 71 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=71&view=rev Author: gshi Date: 2010-05-26 14:35:58 +0000 (Wed, 26 May 2010) Log Message: ----------- print out an error message when invalid physics device is detected Modified Paths: -------------- trunk/opencl_wrapper.c Modified: trunk/opencl_wrapper.c =================================================================== --- trunk/opencl_wrapper.c 2010-05-24 20:45:27 UTC (rev 70) +++ trunk/opencl_wrapper.c 2010-05-26 14:35:58 UTC (rev 71) @@ -224,6 +224,10 @@ for (i =0;i < wrapper_info->nGPU;i++){ int index = wrapper_info->physGPU[i]; cl_device_type type; + if (index >= ncd_n){ + FPRINTF("Error: Invalid phys device number(%d)\n", index); + continue; + } rc = clGetDeviceInfo(all_devices[non_cpu_devices[index]], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL); if (rc != CL_SUCCESS){ goto out; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-24 20:45:33
|
Revision: 70 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=70&view=rev Author: gshi Date: 2010-05-24 20:45:27 +0000 (Mon, 24 May 2010) Log Message: ----------- added start mapping for non-CPU devices CPU devices will always be visible to users non-CPU devices will be mapped based on its order of appearance Modified Paths: -------------- trunk/opencl_wrapper.c Modified: trunk/opencl_wrapper.c =================================================================== --- trunk/opencl_wrapper.c 2010-05-23 19:21:12 UTC (rev 69) +++ trunk/opencl_wrapper.c 2010-05-24 20:45:27 UTC (rev 70) @@ -176,6 +176,7 @@ } } //find out the devices that matches the target type +#if 0 //dumb device mapping int output_n = 0; for(i=0;i < wrapper_info->nGPU; i++){ int index = wrapper_info->physGPU[i]; @@ -191,7 +192,56 @@ output_n++; } } +#else /*the cpu device is always visible to users, the gpu devices is mapped + according to the order of its appearannce*/ + + int non_cpu_devices[MAX_DEVICES_PER_NODE]; + int output_n = 0; + int ncd_n=0; + for(i=0;i < n; i++){ + cl_device_type type; + rc = clGetDeviceInfo(all_devices[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL); + if (rc != CL_SUCCESS){ + goto out; + } + + if( ((target_device_type == CL_DEVICE_TYPE_ALL) || (target_device_type==CL_DEVICE_TYPE_CPU)) + && type == CL_DEVICE_TYPE_CPU){ + if (devices){ + devices[output_n] = all_devices[i]; + } + output_n++; + } + + if (type !=CL_DEVICE_TYPE_CPU){ + non_cpu_devices[ncd_n] = i; + ncd_n ++; + } + } + + //add non-cpu device + if (target_device_type != CL_DEVICE_TYPE_CPU){ + for (i =0;i < wrapper_info->nGPU;i++){ + int index = wrapper_info->physGPU[i]; + cl_device_type type; + rc = clGetDeviceInfo(all_devices[non_cpu_devices[index]], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL); + if (rc != CL_SUCCESS){ + goto out; + } + if( (target_device_type == CL_DEVICE_TYPE_ALL) || (target_device_type == type)){ + if (devices){ + devices[output_n] = all_devices[index]; + } + output_n++; + + } + } + } + +#endif + + if (num_devices){ *num_devices = output_n; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-23 19:21:18
|
Revision: 69 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=69&view=rev Author: gshi Date: 2010-05-23 19:21:12 +0000 (Sun, 23 May 2010) Log Message: ----------- bug fix: the devices were not accessible to opencl when in magic mode Modified Paths: -------------- trunk/opencl_wrapper.c Modified: trunk/opencl_wrapper.c =================================================================== --- trunk/opencl_wrapper.c 2010-05-22 22:05:57 UTC (rev 68) +++ trunk/opencl_wrapper.c 2010-05-23 19:21:12 UTC (rev 69) @@ -8,6 +8,7 @@ static int passThru = 0; static int verbose = 0; static int available = 1; +extern int cuda_wrapper_magic_job_enabled; extern int wrapper_print_mapping(FILE*); @@ -49,7 +50,7 @@ } /* do not allow to modify shared memory if it does not belong to this user */ - if (!wrapper_info || wrapper_info->userID != userID){ + if (!wrapper_info || (wrapper_info->userID != userID && !cuda_wrapper_magic_job_enabled)){ passThru = 0; available = 0; return; @@ -144,7 +145,6 @@ goto out; } - wrapper_info->cuda_exec_type = OPENCL_EXEC; int cpu_device_available =0; @@ -175,14 +175,6 @@ target_device_type = CL_DEVICE_TYPE_GPU; } } - printf("CPU=%d, gpu=%d\n", CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU); - - if (device_type == CL_DEVICE_TYPE_CPU){ - printf("searching for cpu devices\n"); - }else if(device_type == CL_DEVICE_TYPE_GPU){ - printf("searching for gpu devices\n"); - } - //find out the devices that matches the target type int output_n = 0; for(i=0;i < wrapper_info->nGPU; i++){ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-22 22:06:03
|
Revision: 68 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=68&view=rev Author: gshi Date: 2010-05-22 22:05:57 +0000 (Sat, 22 May 2010) Log Message: ----------- make the devices visibility work with different device type for both amd and nvidia Modified Paths: -------------- trunk/Makefile trunk/cuda_wrapper.c trunk/cuda_wrapper.h trunk/opencl_wrapper.c trunk/wrapper_common.c Modified: trunk/Makefile =================================================================== --- trunk/Makefile 2010-05-20 21:48:47 UTC (rev 67) +++ trunk/Makefile 2010-05-22 22:05:57 UTC (rev 68) @@ -58,6 +58,7 @@ endif all: wrapper_init wrapper_terminate wrapper_query cuda_wrapper.so cuda_memscrubber cuda_affinity_test +opencl: wrapper_init wrapper_terminate wrapper_query opencl_wrapper.so wrapper_common.o: wrapper_common.c cuda_wrapper.h ${CC} -c ${CFLAGS} -I. -D_GNU_SOURCE wrapper_common.c @@ -76,6 +77,8 @@ cuda_wrapper.so: cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} $(CC) -shared -I. -I$(CUDA_INC_DIR) cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} -o cuda_wrapper.so -ldl ${OPENCL_LINK_LIB} +opencl_wrapper.so: wrapper_common.o opencl_wrapper.o + $(CC) -shared wrapper_common.o opencl_wrapper.o -o opencl_wrapper.so -ldl ${OPENCL_LINK_LIB} cuda_wrapper.o: cuda_wrapper.c $(CC) -g -I. -I$(CUDA_INC_DIR) ${CFLAGS} -D_GNU_SOURCE -c cuda_wrapper.c Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2010-05-20 21:48:47 UTC (rev 67) +++ trunk/cuda_wrapper.c 2010-05-22 22:05:57 UTC (rev 68) @@ -98,17 +98,17 @@ static void* (*real_dlopen)(const char* filename, int flag); -cuda_wrapper_t wrapper_info = NULL; -cuda_wrapper_t wrapper_attach_shmem_segment(void); +extern cuda_wrapper_t wrapper_info; +extern cuda_wrapper_t wrapper_attach_shmem_segment(void); int wrapper_print_mapping(FILE*); static int passThru = 0; -static int verbose = 0; +extern int cuda_wrapper_verbose; static int available = 1; static int cuda_virt_disabled= 0; static int nGPU; static int cuda_exec = 0; -static int magic_job_enabled= 0; +extern int cuda_wrapper_magic_job_enabled; static __thread cudaEvent_t start_event, stop_event; static __thread double gpu_kernel_time =0; @@ -127,23 +127,10 @@ void* real_arg; }pthread_arg_t; -int -wrapper_print_mapping(FILE* out) -{ - static int firsttime = 1; - if (!firsttime){ - return 0; - } - int i; - fprintf(out, "[%s]: GPU mapping:\n", MSG_PREFIX); - for (i=0; i < wrapper_info->nGPU; i++){ - fprintf(out, "[%s]:\tVirtual-->Physical: %d --> %d\n", MSG_PREFIX,i, wrapper_info->physGPU[i]); - } - firsttime=0; +extern int wrapper_print_mapping(FILE* out); - return 0; -} + static char* getexename(char* buf, size_t size) { @@ -264,7 +251,7 @@ uid_t userID; if (getenv("CUDA_WRAPPER_VERBOSE")){ - verbose = 1; + cuda_wrapper_verbose = 1; } if (getenv("CUDA_VIRT_DISABLE")){ cuda_virt_disabled= 1; @@ -321,7 +308,7 @@ return 0; } - if (!wrapper_info || (wrapper_info->userID != userID && !magic_job_enabled)){ + if (!wrapper_info || (wrapper_info->userID != userID && !cuda_wrapper_magic_job_enabled)){ passThru = 0; available = 0; return 0; @@ -337,10 +324,10 @@ } } - if (passThru && verbose){ + if (passThru && cuda_wrapper_verbose){ PRINTF("passThru is enabled\n"); } - if (verbose && passThru == 0 && cuda_virt_disabled ==0){ + if (cuda_wrapper_verbose && passThru == 0 && cuda_virt_disabled ==0){ wrapper_print_mapping(stdout); } @@ -1301,72 +1288,3 @@ return ret; } - - - -/* - * This function is only called by wrapper_get_real_funcs() - * it sets wrapper_info if it founds PBS_JOBID-related shared memory - * availability and passThru is set in the wrapper_get_real_funcs() - */ - -cuda_wrapper_t -wrapper_attach_shmem_segment(void) -{ - - key_t key; - int shmid; - char *data; - char *JOBID; - int jobID; - - //test the magic jobid - key = MAGIC_JOBID; - shmid = shmget(key, sizeof(struct cuda_wrapper), WRAPPER_SHMEM_PERM); - if (shmid != -1){ //shared memory with magic jobid is present - data = (char*)shmat(shmid, (void*)0, 0); - if (data != (char*)-1){//found the shared memory segment - magic_job_enabled=1; - if (verbose){ - FPRINTF(" Magic job mode enabled!\n"); - } - return (cuda_wrapper_t)data; - } - } - - - JOBID = getenv("PBS_JOBID"); - if (!JOBID){ - /* Not running in a PBS environment. No shared memory */ - return NULL; - } - - - jobID = wrapper_atoi(JOBID); - if (jobID <= 0){ - /*invalid job id*/ - return NULL; - } - - - key = jobID; - shmid = shmget(key, sizeof(struct cuda_wrapper), WRAPPER_SHMEM_PERM); - if (shmid == -1){ - //perror("shmget:"); - //fprintf(stderr, "ERROR: No shared memory found with this jobID (%d)\n", jobID); - //exit(errno); - return NULL; //means someone has fake JOBID set, we should not disturb the normal operation by exit - } - - data = (char*) shmat(shmid, (void*) 0, 0); - if (data == (char*) -1){ - perror("shmat:"); - //exit(errno); - return NULL; - } - - return (cuda_wrapper_t)data; -} - - - Modified: trunk/cuda_wrapper.h =================================================================== --- trunk/cuda_wrapper.h 2010-05-20 21:48:47 UTC (rev 67) +++ trunk/cuda_wrapper.h 2010-05-22 22:05:57 UTC (rev 68) @@ -45,6 +45,7 @@ #define MAGIC_JOBID 0x7a3a4b5c #define MAX_GPUS_PER_NODE 8 +#define MAX_DEVICES_PER_NODE 8 #define WRAPPER_SHMEM_PERM 0666 #define MAX_KEY_ENV_VAR 32 Modified: trunk/opencl_wrapper.c =================================================================== --- trunk/opencl_wrapper.c 2010-05-20 21:48:47 UTC (rev 67) +++ trunk/opencl_wrapper.c 2010-05-22 22:05:57 UTC (rev 68) @@ -95,6 +95,15 @@ } +/**************************** + * CL_DEVICE_TYPE_DEFAULT is for GPU devices with nvidia + * but with AMD it means to get CPU devices + * Here we emulate the behavior by checking if there is CPU devices + * available; if there is, then the default is set to cpu + * otherwise the GPU devices are the default + * + *****************************/ + cl_int clGetDeviceIDs (cl_platform_id platform, cl_device_type device_type, @@ -102,65 +111,102 @@ cl_device_id *devices, cl_uint *num_devices) { - int i; - cl_int rc = CL_SUCCESS; - + int i; + cl_int rc = CL_SUCCESS; + - static int depth = 0; + static int depth = 0; - depth++; - if (depth > 10){ - FPRINTF("ERROR: We are likely in endless recursion (depth=%d), exitting\n", depth); - exit(1); - } + depth++; + if (depth > 10){ + FPRINTF("ERROR: We are likely in endless recursion (depth=%d), exitting\n", depth); + exit(1); + } - opencl_wrapper_get_real_funcs(); + opencl_wrapper_get_real_funcs(); - //PRINTF("calling clGetDeviceIDs\n"); - //if passThru, call the real function - if ( passThru){ + //PRINTF("calling clGetDeviceIDs\n"); + //if passThru, call the real function + if ( passThru){ - rc = real_clGetDeviceIDs(platform, device_type, num_entries, devices, num_devices); - goto out; - } + rc = real_clGetDeviceIDs(platform, device_type, num_entries, devices, num_devices); + goto out; + } - if ( ((devices == NULL) && (num_devices == NULL)) - || (devices && num_entries == 0)){ - rc = CL_INVALID_VALUE; - goto out; - } + if ( ((devices == NULL) && (num_devices == NULL)) + || (devices && num_entries == 0)){ + rc = CL_INVALID_VALUE; + goto out; + } - if (!available){ - rc= CL_DEVICE_NOT_FOUND; - goto out; - } + if (!available){ + rc= CL_DEVICE_NOT_FOUND; + goto out; + } - wrapper_info->cuda_exec_type = OPENCL_EXEC; + wrapper_info->cuda_exec_type = OPENCL_EXEC; - int output_n = wrapper_info->nGPU; - - if (devices){ - cl_device_id all_devices[MAX_GPUS_PER_NODE]; - cl_uint n; - rc = real_clGetDeviceIDs(platform, device_type, MAX_GPUS_PER_NODE, all_devices, &n); - assert(n >= wrapper_info->nGPU); - - output_n = (num_entries > wrapper_info->nGPU)? wrapper_info->nGPU:num_entries; - for(i=0;i < output_n; i++){ - int index = wrapper_info->physGPU[i]; - devices[i] = all_devices[index]; - } - - + int cpu_device_available =0; + cl_device_id all_devices[MAX_DEVICES_PER_NODE]; + cl_uint n; + rc = real_clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, MAX_DEVICES_PER_NODE, all_devices, &n); + assert(n >= wrapper_info->nGPU); + + //find out if there is any cpu device + for(i=0;i < n; i++){ + cl_device_type type; + rc = clGetDeviceInfo(all_devices[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL); + if (rc != CL_SUCCESS){ + goto out; } - - if (num_devices){ - *num_devices = output_n; + if (type == CL_DEVICE_TYPE_CPU){ + cpu_device_available =1; + break; } + } + + //find out what the default device is + cl_device_type target_device_type = device_type; + if (device_type == CL_DEVICE_TYPE_DEFAULT){ + if (cpu_device_available){ + target_device_type = CL_DEVICE_TYPE_CPU; + }else{ + target_device_type = CL_DEVICE_TYPE_GPU; + } + } + printf("CPU=%d, gpu=%d\n", CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU); + + if (device_type == CL_DEVICE_TYPE_CPU){ + printf("searching for cpu devices\n"); + }else if(device_type == CL_DEVICE_TYPE_GPU){ + printf("searching for gpu devices\n"); + } + + //find out the devices that matches the target type + int output_n = 0; + for(i=0;i < wrapper_info->nGPU; i++){ + int index = wrapper_info->physGPU[i]; + cl_device_type type; + rc = clGetDeviceInfo(all_devices[index], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL); + if (rc != CL_SUCCESS){ + goto out; + } + if( (target_device_type == CL_DEVICE_TYPE_ALL) || (type == target_device_type) ){ + if (devices){ + devices[output_n] = all_devices[index]; + } + output_n++; + } + } + + + if (num_devices){ + *num_devices = output_n; + } out: - depth -- ; - return rc; + depth -- ; + return rc; } Modified: trunk/wrapper_common.c =================================================================== --- trunk/wrapper_common.c 2010-05-20 21:48:47 UTC (rev 67) +++ trunk/wrapper_common.c 2010-05-22 22:05:57 UTC (rev 68) @@ -41,6 +41,9 @@ #include "cuda_wrapper.h" #define MAXSTRLEN 64 +cuda_wrapper_t wrapper_info = NULL; +int cuda_wrapper_magic_job_enabled= 0; +int cuda_wrapper_verbose = 0; void wrapper_print_out(cuda_wrapper_t wp) @@ -111,3 +114,87 @@ return ret; } + + +/* + * This function is only called by wrapper_get_real_funcs() + * it sets wrapper_info if it founds PBS_JOBID-related shared memory + * availability and passThru is set in the wrapper_get_real_funcs() + */ + +cuda_wrapper_t +wrapper_attach_shmem_segment(void) +{ + + key_t key; + int shmid; + char *data; + char *JOBID; + int jobID; + + //test the magic jobid + key = MAGIC_JOBID; + shmid = shmget(key, sizeof(struct cuda_wrapper), WRAPPER_SHMEM_PERM); + if (shmid != -1){ //shared memory with magic jobid is present + data = (char*)shmat(shmid, (void*)0, 0); + if (data != (char*)-1){//found the shared memory segment + cuda_wrapper_magic_job_enabled=1; + if (cuda_wrapper_verbose){ + FPRINTF(" Magic job mode enabled!\n"); + } + return (cuda_wrapper_t)data; + } + } + + + JOBID = getenv("PBS_JOBID"); + if (!JOBID){ + /* Not running in a PBS environment. No shared memory */ + return NULL; + } + + + jobID = wrapper_atoi(JOBID); + if (jobID <= 0){ + /*invalid job id*/ + return NULL; + } + + + key = jobID; + shmid = shmget(key, sizeof(struct cuda_wrapper), WRAPPER_SHMEM_PERM); + if (shmid == -1){ + //perror("shmget:"); + //fprintf(stderr, "ERROR: No shared memory found with this jobID (%d)\n", jobID); + //exit(errno); + return NULL; //means someone has fake JOBID set, we should not disturb the normal operation by exit + } + + data = (char*) shmat(shmid, (void*) 0, 0); + if (data == (char*) -1){ + perror("shmat:"); + //exit(errno); + return NULL; + } + + return (cuda_wrapper_t)data; +} + + + +int +wrapper_print_mapping(FILE* out) +{ + static int firsttime = 1; + if (!firsttime){ + return 0; + } + int i; + fprintf(out, "[%s]: GPU mapping:\n", MSG_PREFIX); + for (i=0; i < wrapper_info->nGPU; i++){ + fprintf(out, "[%s]:\tVirtual-->Physical: %d --> %d\n", MSG_PREFIX,i, wrapper_info->physGPU[i]); + } + firsttime=0; + + return 0; +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-20 21:48:53
|
Revision: 67 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=67&view=rev Author: gshi Date: 2010-05-20 21:48:47 +0000 (Thu, 20 May 2010) Log Message: ----------- add a temporary mesaure that can disable the device constrain Modified Paths: -------------- trunk/cuda_wrapper.c Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2010-05-19 23:56:30 UTC (rev 66) +++ trunk/cuda_wrapper.c 2010-05-20 21:48:47 UTC (rev 67) @@ -117,6 +117,7 @@ static struct timeval prog_start_time, prog_stop_time; static pthread_mutex_t total_gpu_kernel_time_lock = PTHREAD_MUTEX_INITIALIZER; static int cuda_wrapper_num_affinity_disabled = 0; +static int cuda_device_constrain_disabled = 0; #define CUDA_GDB_ENV_STRING "__CUDA_GDB_RUNNING__" @@ -271,6 +272,9 @@ if (getenv("CUDA_WRAPPER_NUMA_AFFINITY_DISABLED")){ cuda_wrapper_num_affinity_disabled=1; } + if (getenv("CUDA_WRAPPER_DEVICE_CONSTRAIN_DISABLED")){ + cuda_device_constrain_disabled=1; + } /* Attach to the shared memory segment for this node */ wrapper_info = wrapper_attach_shmem_segment(); if (!real_dlopen){ @@ -448,11 +452,12 @@ for(i=0;i<nGPU;i++){ logical_devices[i] = i; } - - rc = cudaSetValidDevices(logical_devices, nGPU); - if (rc != cudaSuccess){ + if (!cuda_device_constrain_disabled){ + rc = cudaSetValidDevices(logical_devices, nGPU); + if (rc != cudaSuccess){ FPRINTF("Warning: cudaSetValidDevices() call in %s failed\n", __FUNCTION__); FPRINTF("The cuda error is: %s, nGPU=%d\n", cudaGetErrorString(cudaGetLastError()), nGPU); + } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-19 23:56:36
|
Revision: 66 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=66&view=rev Author: gshi Date: 2010-05-19 23:56:30 +0000 (Wed, 19 May 2010) Log Message: ----------- Wrong macro for opencl macro fixed Modified Paths: -------------- trunk/cuda_wrapper.h Modified: trunk/cuda_wrapper.h =================================================================== --- trunk/cuda_wrapper.h 2010-05-19 22:41:04 UTC (rev 65) +++ trunk/cuda_wrapper.h 2010-05-19 23:56:30 UTC (rev 66) @@ -69,7 +69,7 @@ #define CUDA_EXEC_RUNTIME 1 #define CUDA_EXEC_DRIVER 2 -#define OPENCL_EXEC 3 +#define OPENCL_EXEC 4 struct cuda_wrapper { int version; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-19 22:41:10
|
Revision: 65 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=65&view=rev Author: gshi Date: 2010-05-19 22:41:04 +0000 (Wed, 19 May 2010) Log Message: ----------- add opencl printout when opencl is used Modified Paths: -------------- trunk/cuda_wrapper.h trunk/opencl_wrapper.c trunk/wrapper_common.c Modified: trunk/cuda_wrapper.h =================================================================== --- trunk/cuda_wrapper.h 2010-05-19 21:18:25 UTC (rev 64) +++ trunk/cuda_wrapper.h 2010-05-19 22:41:04 UTC (rev 65) @@ -69,6 +69,7 @@ #define CUDA_EXEC_RUNTIME 1 #define CUDA_EXEC_DRIVER 2 +#define OPENCL_EXEC 3 struct cuda_wrapper { int version; Modified: trunk/opencl_wrapper.c =================================================================== --- trunk/opencl_wrapper.c 2010-05-19 21:18:25 UTC (rev 64) +++ trunk/opencl_wrapper.c 2010-05-19 22:41:04 UTC (rev 65) @@ -135,7 +135,9 @@ goto out; } - + + wrapper_info->cuda_exec_type = OPENCL_EXEC; + int output_n = wrapper_info->nGPU; if (devices){ Modified: trunk/wrapper_common.c =================================================================== --- trunk/wrapper_common.c 2010-05-19 21:18:25 UTC (rev 64) +++ trunk/wrapper_common.c 2010-05-19 22:41:04 UTC (rev 65) @@ -78,6 +78,9 @@ if (wp->cuda_exec_type & CUDA_EXEC_DRIVER){ strcat(buf, "Driver_API "); } + if (wp->cuda_exec_type & OPENCL_EXEC){ + strcat(buf, "OpenCL "); + } if (wp->cuda_exec_type == 0){ strcat(buf, "Unknown"); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-19 21:18:31
|
Revision: 64 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=64&view=rev Author: gshi Date: 2010-05-19 21:18:25 +0000 (Wed, 19 May 2010) Log Message: ----------- add the option to disable numa affinity Modified Paths: -------------- trunk/cuda_wrapper.c Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2010-05-19 17:31:43 UTC (rev 63) +++ trunk/cuda_wrapper.c 2010-05-19 21:18:25 UTC (rev 64) @@ -116,6 +116,7 @@ static double total_gpu_kernel_time =0; static struct timeval prog_start_time, prog_stop_time; static pthread_mutex_t total_gpu_kernel_time_lock = PTHREAD_MUTEX_INITIALIZER; +static int cuda_wrapper_num_affinity_disabled = 0; #define CUDA_GDB_ENV_STRING "__CUDA_GDB_RUNNING__" @@ -267,7 +268,9 @@ if (getenv("CUDA_VIRT_DISABLE")){ cuda_virt_disabled= 1; } - + if (getenv("CUDA_WRAPPER_NUMA_AFFINITY_DISABLED")){ + cuda_wrapper_num_affinity_disabled=1; + } /* Attach to the shared memory segment for this node */ wrapper_info = wrapper_attach_shmem_segment(); if (!real_dlopen){ @@ -392,6 +395,9 @@ static int cuda_wrapper_set_affinity(int device) { + if(cuda_wrapper_num_affinity_disabled){ + return 0; + } int ret; assert(device <= wrapper_info->nGPU); int phys_gpu = wrapper_virt_to_phys(device); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-19 17:31:51
|
Revision: 63 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=63&view=rev Author: gshi Date: 2010-05-19 17:31:43 +0000 (Wed, 19 May 2010) Log Message: ----------- add one space in message Modified Paths: -------------- trunk/cuda_wrapper.c Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2010-05-19 17:30:18 UTC (rev 62) +++ trunk/cuda_wrapper.c 2010-05-19 17:31:43 UTC (rev 63) @@ -1317,7 +1317,7 @@ if (data != (char*)-1){//found the shared memory segment magic_job_enabled=1; if (verbose){ - FPRINTF("Magic job mode enabled!\n"); + FPRINTF(" Magic job mode enabled!\n"); } return (cuda_wrapper_t)data; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-19 17:30:24
|
Revision: 62 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=62&view=rev Author: gshi Date: 2010-05-19 17:30:18 +0000 (Wed, 19 May 2010) Log Message: ----------- only print out the magic job message if the verbose is set Modified Paths: -------------- trunk/cuda_wrapper.c Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2010-05-14 01:59:23 UTC (rev 61) +++ trunk/cuda_wrapper.c 2010-05-19 17:30:18 UTC (rev 62) @@ -1316,7 +1316,9 @@ data = (char*)shmat(shmid, (void*)0, 0); if (data != (char*)-1){//found the shared memory segment magic_job_enabled=1; - FPRINTF("Magic job mode enabled!\n"); + if (verbose){ + FPRINTF("Magic job mode enabled!\n"); + } return (cuda_wrapper_t)data; } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-14 01:59:29
|
Revision: 61 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=61&view=rev Author: gshi Date: 2010-05-14 01:59:23 +0000 (Fri, 14 May 2010) Log Message: ----------- use magic jobid to search for shared memory segment in wrapper_query when no jobid is specified or found within pbs enviroment Modified Paths: -------------- trunk/wrapper_query.c Modified: trunk/wrapper_query.c =================================================================== --- trunk/wrapper_query.c 2010-05-13 22:04:41 UTC (rev 60) +++ trunk/wrapper_query.c 2010-05-14 01:59:23 UTC (rev 61) @@ -156,7 +156,7 @@ case 'k': jobID = wrapper_atoi(optarg); if (jobID <= 0){ - fprintf(stderr, "Error: invalid key\n"); + fprintf(stderr, "Error: invalid key\n"); print_usage(argv[0]); return -1; } @@ -178,16 +178,17 @@ if (jobID <= 0){ char* jobID_str = getenv("PBS_JOBID"); - if (jobID_str == NULL){ - print_usage(argv[0]); - return -1; - } - jobID = wrapper_atoi(jobID_str); - if (jobID <= 0){ + if (jobID_str != NULL){ + jobID = wrapper_atoi(jobID_str); + if (jobID <= 0){ fprintf(stderr, "Error: invalid jobid enviroment variable(%d)\n", jobID_str); print_usage(argv[0]); return -1; + } + }else{ + jobID = MAGIC_JOBID; } + } wrapper_query_shmem_segment(jobID); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <je...@us...> - 2010-05-13 22:04:47
|
Revision: 60 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=60&view=rev Author: jenos Date: 2010-05-13 22:04:41 +0000 (Thu, 13 May 2010) Log Message: ----------- Updated admin readme for wrapper use w/o visibility scoping. Changed makefile to Makefile. Included OpenCL support enabled. Modified Paths: -------------- trunk/readme_admin.txt Added Paths: ----------- trunk/Makefile Removed Paths: ------------- trunk/makefile Added: trunk/Makefile =================================================================== --- trunk/Makefile (rev 0) +++ trunk/Makefile 2010-05-13 22:04:41 UTC (rev 60) @@ -0,0 +1,100 @@ +# Illinois Open Source License +# +# University of Illinois/NCSA +# Open Source License +# +# Copyright \xA9 2009, University of Illinois. All rights reserved. +# +# Developed by: +# +# Innovative Systems Lab +# National Center for Supercomputing Applications +# http://www.ncsa.uiuc.edu/AboutUs/Directorates/ISL.html +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal with +# the Software without restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the +# Software, and to permit persons to whom the Software is furnished to do so, subject +# to the following conditions: +# +# * Redistributions of source code must retain the above copyright notice, this list +# of conditions and the following disclaimers. +# +# * Redistributions in binary form must reproduce the above copyright notice, this list +# of conditions and the following disclaimers in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the names of the Innovative Systems Lab, the National Center for Supercomputing +# Applications, nor the names of its contributors may be used to endorse or promote products +# derived from this Software without specific prior written permission. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT +# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS WITH THE SOFTWARE. +# + +COMPILE_CUDA_OCL_WRAPPER=1 + +CUDA_HOME ?=/usr/local/cuda +OPENCL_INC_DIR ?=/usr/local/opencl/include +OPENCL_LIB_DIR ?=/usr/local/opencl/lib + +CC=gcc +CFLAGS = -fPIC -Wall +CUDA_INC_DIR = ${CUDA_HOME}/include +CUDA_LIB_DIR =${CUDA_HOME}/lib64 + + +ifeq (${COMPILE_CUDA_OCL_WRAPPER}, 1) + OPENCL_WRAPPER_OBJ=opencl_wrapper.o + OPENCL_LINK_LIB=-L${OPENCL_LIB_DIR} -lOpenCL +else + OPENCL_WRAPPER_OBJ= + OPENCL_LINK_LIB= +endif + +all: wrapper_init wrapper_terminate wrapper_query cuda_wrapper.so cuda_memscrubber cuda_affinity_test + +wrapper_common.o: wrapper_common.c cuda_wrapper.h + ${CC} -c ${CFLAGS} -I. -D_GNU_SOURCE wrapper_common.c + +wrapper_init: wrapper_init.c cuda_wrapper.h wrapper_common.o + $(CC) -c -I. -D_GNU_SOURCE wrapper_init.c + $(CC) wrapper_init.o wrapper_common.o -o wrapper_init -lpthread + +wrapper_terminate: wrapper_terminate.c cuda_wrapper.h wrapper_common.o + $(CC) -c -I. -D_GNU_SOURCE wrapper_terminate.c + $(CC) wrapper_terminate.o wrapper_common.o -o wrapper_terminate -lpthread + +wrapper_query: wrapper_query.c cuda_wrapper.h wrapper_common.o + $(CC) -c -I. -D_GNU_SOURCE wrapper_query.c + $(CC) wrapper_query.o wrapper_common.o -o wrapper_query -lpthread + +cuda_wrapper.so: cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} + $(CC) -shared -I. -I$(CUDA_INC_DIR) cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} -o cuda_wrapper.so -ldl ${OPENCL_LINK_LIB} + +cuda_wrapper.o: cuda_wrapper.c + $(CC) -g -I. -I$(CUDA_INC_DIR) ${CFLAGS} -D_GNU_SOURCE -c cuda_wrapper.c + +opencl_wrapper.o: opencl_wrapper.c + ${CC} -g -I${OPENCL_INC_DIR} ${CFLAGS} -D_GNU_SOURCE -c opencl_wrapper.c + +cuda_memscrubber: cuda_memscrubber.c + $(CC) -O3 -funroll-all-loops -I. -I$(CUDA_INC_DIR) cuda_memscrubber.c -o cuda_memscrubber -L$(CUDA_LIB_DIR) -lcuda -lpthread + +cuda_affinity_test: cuda_affinity_test.c + ${CC} -O3 -I${CUDA_INC_DIR} -o $@ $< -L${CUDA_LIB_DIR} -lnuma -lcudart + +clean: + rm -f *.o cuda_wrapper.so wrapper_terminate wrapper_init wrapper_query cuda_memscrubber +install: + install -cD cuda_wrapper.so /usr/local/cuda_wrapper/lib64/cuda_wrapper.so + install -cD -m 700 wrapper_init /usr/local/cuda_wrapper/bin/wrapper_init + install -cD -m 700 wrapper_terminate /usr/local/cuda_wrapper/bin/wrapper_terminate + install -cD wrapper_query /usr/local/cuda_wrapper/bin/wrapper_query + install -cD cuda_memscrubber /usr/local/cuda_wrapper/bin/cuda_memscrubber + Deleted: trunk/makefile =================================================================== --- trunk/makefile 2010-05-13 14:14:19 UTC (rev 59) +++ trunk/makefile 2010-05-13 22:04:41 UTC (rev 60) @@ -1,100 +0,0 @@ -# Illinois Open Source License -# -# University of Illinois/NCSA -# Open Source License -# -# Copyright \xA9 2009, University of Illinois. All rights reserved. -# -# Developed by: -# -# Innovative Systems Lab -# National Center for Supercomputing Applications -# http://www.ncsa.uiuc.edu/AboutUs/Directorates/ISL.html -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of -# this software and associated documentation files (the "Software"), to deal with -# the Software without restriction, including without limitation the rights to use, -# copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the -# Software, and to permit persons to whom the Software is furnished to do so, subject -# to the following conditions: -# -# * Redistributions of source code must retain the above copyright notice, this list -# of conditions and the following disclaimers. -# -# * Redistributions in binary form must reproduce the above copyright notice, this list -# of conditions and the following disclaimers in the documentation and/or other materials -# provided with the distribution. -# -# * Neither the names of the Innovative Systems Lab, the National Center for Supercomputing -# Applications, nor the names of its contributors may be used to endorse or promote products -# derived from this Software without specific prior written permission. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT -# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS WITH THE SOFTWARE. -# - -#COMPILE_CUDA_OCL_WRAPPER=1 - -CUDA_HOME ?=/usr/local/cuda -OPENCL_INC_DIR ?=/usr/local/opencl/include -OPENCL_LIB_DIR ?=/usr/local/opencl/lib - -CC=gcc -CFLAGS = -fPIC -Wall -CUDA_INC_DIR = ${CUDA_HOME}/include -CUDA_LIB_DIR =${CUDA_HOME}/lib64 - - -ifeq (${COMPILE_CUDA_OCL_WRAPPER}, 1) - OPENCL_WRAPPER_OBJ=opencl_wrapper.o - OPENCL_LINK_LIB=-L${OPENCL_LIB_DIR} -lOpenCL -else - OPENCL_WRAPPER_OBJ= - OPENCL_LINK_LIB= -endif - -all: wrapper_init wrapper_terminate wrapper_query cuda_wrapper.so cuda_memscrubber cuda_affinity_test - -wrapper_common.o: wrapper_common.c cuda_wrapper.h - ${CC} -c ${CFLAGS} -I. -D_GNU_SOURCE wrapper_common.c - -wrapper_init: wrapper_init.c cuda_wrapper.h wrapper_common.o - $(CC) -c -I. -D_GNU_SOURCE wrapper_init.c - $(CC) wrapper_init.o wrapper_common.o -o wrapper_init -lpthread - -wrapper_terminate: wrapper_terminate.c cuda_wrapper.h wrapper_common.o - $(CC) -c -I. -D_GNU_SOURCE wrapper_terminate.c - $(CC) wrapper_terminate.o wrapper_common.o -o wrapper_terminate -lpthread - -wrapper_query: wrapper_query.c cuda_wrapper.h wrapper_common.o - $(CC) -c -I. -D_GNU_SOURCE wrapper_query.c - $(CC) wrapper_query.o wrapper_common.o -o wrapper_query -lpthread - -cuda_wrapper.so: cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} - $(CC) -shared -I. -I$(CUDA_INC_DIR) cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} -o cuda_wrapper.so -ldl ${OPENCL_LINK_LIB} - -cuda_wrapper.o: cuda_wrapper.c - $(CC) -g -I. -I$(CUDA_INC_DIR) ${CFLAGS} -D_GNU_SOURCE -c cuda_wrapper.c - -opencl_wrapper.o: opencl_wrapper.c - ${CC} -g -I${OPENCL_INC_DIR} ${CFLAGS} -D_GNU_SOURCE -c opencl_wrapper.c - -cuda_memscrubber: cuda_memscrubber.c - $(CC) -O3 -funroll-all-loops -I. -I$(CUDA_INC_DIR) cuda_memscrubber.c -o cuda_memscrubber -L$(CUDA_LIB_DIR) -lcuda -lpthread - -cuda_affinity_test: cuda_affinity_test.c - ${CC} -O3 -I${CUDA_INC_DIR} -o $@ $< -L${CUDA_LIB_DIR} -lnuma -lcudart - -clean: - rm -f *.o cuda_wrapper.so wrapper_terminate wrapper_init wrapper_query cuda_memscrubber -install: - install -cD cuda_wrapper.so /usr/local/cuda_wrapper/lib64/cuda_wrapper.so - install -cD -m 700 wrapper_init /usr/local/cuda_wrapper/bin/wrapper_init - install -cD -m 700 wrapper_terminate /usr/local/cuda_wrapper/bin/wrapper_terminate - install -cD wrapper_query /usr/local/cuda_wrapper/bin/wrapper_query - install -cD cuda_memscrubber /usr/local/cuda_wrapper/bin/cuda_memscrubber - Modified: trunk/readme_admin.txt =================================================================== --- trunk/readme_admin.txt 2010-05-13 14:14:19 UTC (rev 59) +++ trunk/readme_admin.txt 2010-05-13 22:04:41 UTC (rev 60) @@ -61,11 +61,11 @@ ############################################################################### # Short Utility Description ############################################################################### -wrapper_init: Create shared memory segment in the machine. It is used by - the admin when creating GPU allocations for a user +wrapper_init: Create shared memory segment in the machine. It is used + by the admin when creating GPU allocations for a user -wrapper_query: Query the existing shared GPU allocations. It can be used by - both the admin and users +wrapper_query: Query the existing shared GPU allocations. It can be + used by both the admin and users wrapper_terminate: Terminate a shared memory segment in the machine. It is used by the admin to de-allocate GPUs. @@ -136,8 +136,10 @@ socket has an affiliation with a given PCI-E slot. It is essentially a list of GPUs by device number, followed by a comma delimited list of the closest CPU cores in terms of NUMA. Setting these options on NUMA systems -can improve memory bandwidth by as much as 25%. -Examples are shown in the "Implementation Suggestions" section below. +can improve memory bandwidth by as much as 25%. The cuda_affinity_test +utility generate these lines for you based on a benchmark if needed, and is +described further below in addition to examples in "Implementation Suggestions" +below. The CUDA wrapper libary depends on a shared memory initialization prior to user application requests for GPU devices. Essentially, this is needed to @@ -159,9 +161,12 @@ options: -c <config file> -g <GPU allocated file> # required for instantiation - -u <user id> # required - -k <unique instance key> # required, e.g. $PBS_JOBID + -u <user id> # required (unless -m is used) + -k <unique instance key> # required e.g. $PBS_JOBID (unless -m is used) + -m # use magic jobid (mutually exlusive with + -k option) -l # list info from shared memory (if it exists) + -d # keep the physical GPU order in the file -v # verbose -h # print out this message @@ -209,6 +214,16 @@ process by accident. (some may be in use by another job on a shared host) ############################################################################### +Virtualize Only Use (no device visibility scoping) +############################################################################### +Some scenarios have multi-gpu systems which need to maintain all GPU device +visibility to all users, yet still would benefit from other wrapper features +like device virtualization, affinity mapping, or usage statistics. This can +be accomplished with the magic jobid parameter (-m) to wrapper init. This use +maintains all GPU access and visibility, but all other wrapper features are +available. + +############################################################################### Implementation Suggestions: ############################################################################### A job scheduler like Moab from Cluster Resources may support definition of This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-13 14:14:25
|
Revision: 59 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=59&view=rev Author: gshi Date: 2010-05-13 14:14:19 +0000 (Thu, 13 May 2010) Log Message: ----------- print out a message about magic job mode when the magic job id is used Modified Paths: -------------- trunk/cuda_wrapper.c Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2010-05-13 01:48:59 UTC (rev 58) +++ trunk/cuda_wrapper.c 2010-05-13 14:14:19 UTC (rev 59) @@ -1316,6 +1316,7 @@ data = (char*)shmat(shmid, (void*)0, 0); if (data != (char*)-1){//found the shared memory segment magic_job_enabled=1; + FPRINTF("Magic job mode enabled!\n"); return (cuda_wrapper_t)data; } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <gs...@us...> - 2010-05-13 01:49:08
|
Revision: 58 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=58&view=rev Author: gshi Date: 2010-05-13 01:48:59 +0000 (Thu, 13 May 2010) Log Message: ----------- add a mode to support global GPU map change Modified Paths: -------------- trunk/cuda_wrapper.c trunk/cuda_wrapper.h trunk/wrapper_init.c trunk/wrapper_terminate.c Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2010-05-03 16:56:53 UTC (rev 57) +++ trunk/cuda_wrapper.c 2010-05-13 01:48:59 UTC (rev 58) @@ -59,6 +59,7 @@ #include <pthread.h> #include <unistd.h> #include <sys/time.h> +#include <wrapper_common.h> /* There are two possible ways to determine if the program is using cuda API * The first one is checking the executible in the contructor @@ -107,6 +108,7 @@ static int cuda_virt_disabled= 0; static int nGPU; static int cuda_exec = 0; +static int magic_job_enabled= 0; static __thread cudaEvent_t start_event, stop_event; static __thread double gpu_kernel_time =0; @@ -312,7 +314,7 @@ return 0; } - if (!wrapper_info || wrapper_info->userID != userID){ + if (!wrapper_info || (wrapper_info->userID != userID && !magic_job_enabled)){ passThru = 0; available = 0; return 0; @@ -1307,6 +1309,18 @@ char *JOBID; int jobID; + //test the magic jobid + key = MAGIC_JOBID; + shmid = shmget(key, sizeof(struct cuda_wrapper), WRAPPER_SHMEM_PERM); + if (shmid != -1){ //shared memory with magic jobid is present + data = (char*)shmat(shmid, (void*)0, 0); + if (data != (char*)-1){//found the shared memory segment + magic_job_enabled=1; + return (cuda_wrapper_t)data; + } + } + + JOBID = getenv("PBS_JOBID"); if (!JOBID){ /* Not running in a PBS environment. No shared memory */ Modified: trunk/cuda_wrapper.h =================================================================== --- trunk/cuda_wrapper.h 2010-05-03 16:56:53 UTC (rev 57) +++ trunk/cuda_wrapper.h 2010-05-13 01:48:59 UTC (rev 58) @@ -43,6 +43,7 @@ #ifndef _CUDA_WRAPPER_H_ #define _CUDA_WRAPPER_H_ +#define MAGIC_JOBID 0x7a3a4b5c #define MAX_GPUS_PER_NODE 8 #define WRAPPER_SHMEM_PERM 0666 #define MAX_KEY_ENV_VAR 32 Modified: trunk/wrapper_init.c =================================================================== --- trunk/wrapper_init.c 2010-05-03 16:56:53 UTC (rev 57) +++ trunk/wrapper_init.c 2010-05-13 01:48:59 UTC (rev 58) @@ -60,6 +60,8 @@ {"<unique key>", 1, 0, 'k'}, {"verbose", 0, 0, 'v'}, {"check only", 0, 0, 'l'}, + {"keep GPU order", 0, 0, 'd'}, + {"usee magic jobid", 0, 0, 'm'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; @@ -71,9 +73,10 @@ static int userID = -1; static cuda_wrapper_t wrapper; static unsigned int verbose = 0; -static signed int jobID = 0; +static signed int jobID = -1; static unsigned int checkonly = 0; static char* configFile = NULL; +static int keeporder =0; #define DEFAULT_CONFIG_FILE "/etc/cuda_wrapper.conf" #define MAXLINE 256 @@ -99,9 +102,11 @@ printf("\t-c <config file>\n"); printf("\t-g <GPU allocated file> # required for instantiation\n"); - printf("\t-u <user id> # required\n"); - printf("\t-k <unique instance key> # required, e.g. $PBS_JOBID\n"); + printf("\t-u <user id> # required (unless -m is used)\n"); + printf("\t-k <unique instance key> # required, e.g. $PBS_JOBID (unless -m is used)\n"); + printf("\t-m # use magic jobid (mutually exlusive with -k option)\n"); printf("\t-l # list info from shared memory (if it exists)\n"); + printf("\t-d # keep the physical GPU order in the file\n"); printf("\t-v # verbose\n"); printf("\t-h # print out this message\n"); @@ -315,7 +320,9 @@ /*sort the physical GPUs*/ - qsort(wrapper->physGPU, wrapper->nGPU, sizeof(int), compare_int); + if (!keeporder){ + qsort(wrapper->physGPU, wrapper->nGPU, sizeof(int), compare_int); + } //fprintf(stdout, "Total # of GPUs allocated for job: %d\n", wrapper->nGPU); @@ -337,7 +344,7 @@ int ret = 0; optind = 1; - while ((retval = getopt_long(argc, argv, "g:u:vk:lhc:", + while ((retval = getopt_long(argc, argv, "g:u:vk:lhc:dm", long_options, &option_index)) != -1) { switch(retval){ case 'g': @@ -349,15 +356,26 @@ break; case 'k': - jobID = wrapper_atoi(optarg); - if (jobID <= 0){ - printf("ERROR: invalid unique key"); - print_usage(argv[0]); - return -1; - } - - break; - + if (jobID == MAGIC_JOBID){ + printf("ERROR: setting jobid is mutually exclusive with magic jobid(-m)\n"); + print_usage(argv[0]); + return -1; + } + jobID = wrapper_atoi(optarg); + if (jobID <= 0){ + printf("ERROR: invalid unique key"); + print_usage(argv[0]); + return -1; + } + + break; + case 'm': + if (jobID > 0){ + printf("ERROR: setting jobid is mutually exclusive with magic jobid(-m)\n"); + print_usage(argv[0]); + } + jobID=MAGIC_JOBID; + break; case 'l': checkonly = 1; break; @@ -365,7 +383,10 @@ case 'c': configFile=strdup(optarg); break; - + case 'd': + keeporder=1; + break; + case 'v': verbose = 1; break; @@ -394,8 +415,8 @@ - /* Make sure we atleast have a GPU allocation map and user ID specified. */ - if (!gpuMapFile || userID < 0 || jobID == 0){ + /* Make sure we atleast have a GPU allocation map and user ID specified (unless magic jobid is used). */ + if (!gpuMapFile || (userID < 0 && jobID != MAGIC_JOBID) || jobID == 0){ print_usage(argv[0]); return -1; } Modified: trunk/wrapper_terminate.c =================================================================== --- trunk/wrapper_terminate.c 2010-05-03 16:56:53 UTC (rev 57) +++ trunk/wrapper_terminate.c 2010-05-13 01:48:59 UTC (rev 58) @@ -55,7 +55,8 @@ static struct option long_options[] = { - {"<unique key> # required, e.g. $PBS_JOBID", 1, 0, 'k'}, + {"<unique key> #e.g. $PBS_JOBID", 1, 0, 'k'}, + {"<use magic jobid> ", 1, 0, 'm'}, {"help", 0, 0, 'h'}, {"print GPU processid", 0, 0, 'p'}, {0, 0, 0, 0} @@ -135,17 +136,30 @@ /* Parse argument options */ optind = 1; - while ((retval = getopt_long(argc, argv, "k:hp", + while ((retval = getopt_long(argc, argv, "k:hpm", long_options, &option_index)) != -1) { switch(retval){ case 'k': - jobID = wrapper_atoi(optarg); - if (jobID <= 0){ - fprintf(stderr, "Error: invalid key\n"); + if (jobID == MAGIC_JOBID){ + printf("ERROR: setting jobid is mutually exclusive with magic jobid(-m)\n"); + print_usage(argv[0]); + return -1; + } + jobID = wrapper_atoi(optarg); + if (jobID <= 0){ + fprintf(stderr, "Error: invalid key\n"); print_usage(argv[0]); return -1; } break; + case 'm': + if (jobID > 0){ + printf("ERROR: setting jobid is mutually exclusive with magic jobid(-m)\n"); + print_usage(argv[0]); + return -1; + } + jobID = MAGIC_JOBID; + break; case 'p': print_gpu_processid = 1; break; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |