From: <gs...@us...> - 2012-02-28 21:40:01
|
Revision: 80 http://cudawrapper.svn.sourceforge.net/cudawrapper/?rev=80&view=rev Author: gshi Date: 2012-02-28 21:39:55 +0000 (Tue, 28 Feb 2012) Log Message: ----------- added Galen's affinity CPU code. Now the GPU process will try to find affnity cpu cores from proc FS. If that fails, then it will use the affnity info from the config file Modified Paths: -------------- trunk/Makefile trunk/cuda_wrapper.c trunk/cuda_wrapper.h Added Paths: ----------- trunk/affinity_for_nvidia.c Modified: trunk/Makefile =================================================================== --- trunk/Makefile 2012-02-27 16:55:09 UTC (rev 79) +++ trunk/Makefile 2012-02-28 21:39:55 UTC (rev 80) @@ -3,7 +3,7 @@ # University of Illinois/NCSA # Open Source License # -# Copyright \xA9 2009, University of Illinois. All rights reserved. +# Copyright © 2009, University of Illinois. All rights reserved. # # Developed by: # @@ -44,7 +44,7 @@ OPENCL_LIB_DIR ?=/usr/local/opencl/lib CC=gcc -CFLAGS = -fPIC -Wall +CFLAGS = -c -D_GNU_SOURCE -fPIC -Wall CUDA_INC_DIR = ${CUDA_HOME}/include CUDA_LIB_DIR =${CUDA_HOME}/lib64 @@ -57,6 +57,8 @@ OPENCL_LINK_LIB= endif +CUDA_WRAPPER_OBJS=cuda_wrapper.o wrapper_common.o affinity_for_nvidia.o ${OPENCL_WRAPPER_OBJ} + all: wrapper_init wrapper_terminate wrapper_query cuda_wrapper.so cuda_memscrubber cuda_affinity_test opencl: wrapper_init wrapper_terminate wrapper_query opencl_wrapper.so @@ -75,8 +77,8 @@ $(CC) -c -I. -D_GNU_SOURCE wrapper_query.c $(CC) wrapper_query.o wrapper_common.o -o wrapper_query -lpthread -cuda_wrapper.so: cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} - $(CC) -shared -I. -I$(CUDA_INC_DIR) cuda_wrapper.o wrapper_common.o ${OPENCL_WRAPPER_OBJ} -o cuda_wrapper.so -ldl -lpthread ${OPENCL_LINK_LIB} +cuda_wrapper.so: ${CUDA_WRAPPER_OBJS} + $(CC) -shared -I. -I$(CUDA_INC_DIR) ${CUDA_WRAPPER_OBJS} -o $@ -ldl -lpthread ${OPENCL_LINK_LIB} opencl_wrapper.so: wrapper_common.o opencl_wrapper.o $(CC) -shared wrapper_common.o opencl_wrapper.o -o opencl_wrapper.so -ldl ${OPENCL_LINK_LIB} @@ -89,6 +91,9 @@ cuda_memscrubber: cuda_memscrubber.c $(CC) -O3 -funroll-all-loops -I. -I$(CUDA_INC_DIR) cuda_memscrubber.c -o cuda_memscrubber -L$(CUDA_LIB_DIR) -lcuda -lpthread +affinity_for_nvidia.o: affinity_for_nvidia.c + ${CC} ${CFLAGS} affinity_for_nvidia.c + cuda_affinity_test: cuda_affinity_test.c ${CC} -O3 -I${CUDA_INC_DIR} -o $@ $< -L${CUDA_LIB_DIR} -lnuma -lcudart Added: trunk/affinity_for_nvidia.c =================================================================== --- trunk/affinity_for_nvidia.c (rev 0) +++ trunk/affinity_for_nvidia.c 2012-02-28 21:39:55 UTC (rev 80) @@ -0,0 +1,170 @@ + +// Galen Arnold , ar...@nc... # for support and bug reports +// Guochun Shi +// Jan., 2012 +// NCSA +#include <stdio.h> +#include <stdlib.h> +#include <sched.h> +#include <unistd.h> +#include <string.h> + +int process_core_string_list(const char* _str, int* list, int* ncores); +int process_core_string_item(const char* str, int* sub_list, int* sub_ncores); + +// cpulistaffinity() makes the association between the numbered gpu device "my_gpu" and +// the cpu cores associated with it by following the path name of the pci_bus listed +// under the nvidia driver in /proc . +int cpulistaffinity(int my_gpu, int *cpu_cores, int* ncores) +{ + FILE *nvidia_info, *pci_bus_info; + size_t nbytes = 255; + //int core3, core4; // fillers for sscanf() + char *my_line; + char nvidia_info_path[255], pci_bus_info_path[255]; + char bus_info[255]; + + // the nvidia driver populates this path for each gpu + sprintf(nvidia_info_path,"/proc/driver/nvidia/gpus/%d/information", my_gpu); + nvidia_info= fopen(nvidia_info_path,"r"); + if (nvidia_info == NULL){ + return -1; + } + + my_line= (char *) malloc(nbytes +1); + if (my_line == NULL){ + printf("Error: allocating memory for my_line failed"); + exit(-1); + } + + while (!feof(nvidia_info)){ + if ( -1 == getline(&my_line, &nbytes, nvidia_info)){ + break; + }else{ + // the first 7 char of the Bus Location will lead to the corresponding + // path under /sys/class/pci_bus/ , cpulistaffinity showing cores on that + // bus is located there + if ( 1 == sscanf(my_line,"Bus Location: %s", bus_info )){ + sprintf(pci_bus_info_path,"/sys/class/pci_bus/%.7s/cpulistaffinity", + bus_info); + } + } + } + // open the cpulistaffinity file on the pci_bus for "my_gpu" + pci_bus_info= fopen(pci_bus_info_path,"r"); + if (pci_bus_info == NULL){ + free(my_line); + fclose(nvidia_info); + return -1; + } + + while (!feof(pci_bus_info)){ + if ( -1 == getline(&my_line, &nbytes, pci_bus_info)){ + break; + } else{ + int rc = process_core_string_list(my_line, cpu_cores, ncores); + if(rc < 0){ + printf("Warning:%s: processing the line (%s) failed\n", __FUNCTION__, my_line); + free(my_line); + fclose(nvidia_info); + return -1; + } + } + } + + free(my_line); + return(0); +} + + +int process_core_string_list(const char* _str, int* list, int* ncores) +{ + /* The input string @str should be separated by comma, and each item can be + * either a number or a range (see the comments in process_core_string_item + * function) + * + */ + + if(_str == NULL || list == NULL || ncores == NULL + || *ncores <= 0){ + printf("ERROR: Invalid arguments in function %s\n", __FUNCTION__ ); + return -1; + } + + char str[256]; + strncpy(str, _str, sizeof(str)); + + int left_space = *ncores; + int tot_cores = 0; + + char* item = strtok(str, ","); + if(item == NULL){ + printf("ERROR: Invalid string format(%s)\n", str); + return -1; + } + + do { + int sub_ncores = left_space; + int* sub_list = list + tot_cores; + + int rc = process_core_string_item(item, sub_list, &sub_ncores); + if(rc <0){ + printf("ERROR: processing item(%s) failed\n", item); + return -1; + } + + tot_cores += sub_ncores; + left_space -= sub_ncores; + + item = strtok(NULL, ","); + }while( item != NULL); + + *ncores = tot_cores; + return 0; +} + + +int process_core_string_item(const char* str, int* sub_list, int* sub_ncores) +{ + /* assume the input format is one of the following two + * 1. a number only, e.g. 5 + * 2. a range, e.g 4-6, which means three numbers 4,5,6 + * return a list of numbers in @sub_list and and the total numbers + * in @sub_ncores + */ + int i; + if(str == NULL || sub_list == NULL || sub_ncores == NULL || + *sub_ncores <= 0){ + printf("ERROR: Wrong parameters in function %s!\n", __FUNCTION__); + return -1; + } + + if(strstr(str, "-") != NULL){ + //a range + int low_core, high_core; + if (sscanf(str,"%d-%d",&low_core, &high_core) != 2){ + printf("ERROR: range scan failed\n"); + return -1; + } + if(*sub_ncores < high_core-low_core +1){ + printf("ERROR: not enough space in sub_list\n"); + return -1; + } + + for(i = 0; i < high_core-low_core +1; i++){ + sub_list[i] = i + low_core; + } + *sub_ncores = high_core - low_core +1; + + }else{ + //a number + int core; + if (sscanf(str, "%d", &core) != 1){ + printf("ERROR: wrong format for core number\n"); + return -1; + } + sub_list[0] = core; + *sub_ncores =1; + } + return 0; +} Modified: trunk/cuda_wrapper.c =================================================================== --- trunk/cuda_wrapper.c 2012-02-27 16:55:09 UTC (rev 79) +++ trunk/cuda_wrapper.c 2012-02-28 21:39:55 UTC (rev 80) @@ -96,6 +96,7 @@ static __host__ cudaError_t CUDARTAPI (*real_cudaEventRecord) (cudaEvent_t event, cudaStream_t stream); +extern int cpulistaffinity(int my_gpu, int *cpu_cores, int* ncores); static void* (*real_dlopen)(const char* filename, int flag); @@ -397,24 +398,76 @@ if(cuda_wrapper_num_affinity_disabled){ return 0; } - int ret; - assert(device <= wrapper_info->nGPU); - int phys_gpu = wrapper_virt_to_phys(device); + + assert(device <= wrapper_info->nGPU); + int phys_gpu = wrapper_virt_to_phys(device); + + /*Go to proc filesystem for cpu affnity directly (nvidida only) + *otherwise, use the recorded ones + */ + cpu_set_t cpu_set; + CPU_ZERO(&cpu_set); + int cpu_set_valid = 0; + + int i; + const char* msg; + int cpu_cores[256]; + int ncores=256; + int rc = cpulistaffinity(phys_gpu, cpu_cores, &ncores); + if(rc == 0){ + for(i=0;i < ncores;i++){ + CPU_SET(cpu_cores[i], &cpu_set); + } + cpu_set_valid =1; + msg="proc"; + + }else{ if (wrapper_info->GPUHasAffinity[phys_gpu]) { - /* - * Install affinity mask for the calling thread. For good measure call - * sched_yield() to enforce the mask, though I am not sure if this even - * works (may turn out to be a no-op). - */ - ret = sched_setaffinity(cuda_wrapper_getid(), + /* + * Install affinity mask for the calling thread. For good measure call + * sched_yield() to enforce the mask, though I am not sure if this even + * works (may turn out to be a no-op). + */ + cpu_set = wrapper_info->GPUAffinity[phys_gpu]; + cpu_set_valid = 1; + + if(cuda_wrapper_verbose){ + msg= "config"; + ncores = CPU_COUNT(&cpu_set); + int j = 0; + for(i=0;i< MAX_CPU_CORES_PER_NODE;i++){ + if(CPU_ISSET(i, &cpu_set)){ + cpu_cores[j]=i; + j++; + } + } + if(j != ncores ){ + FPRINTF("Warning: number of cpu cores does not match (%s)\n", __FUNCTION__); + } + } + + } + } + + if(cpu_set_valid){ + + if(cuda_wrapper_verbose){ + printf("setting GPU device %d to affinity CPU cores with info from %s:", phys_gpu, msg); + for(i=0;i < ncores;i++){ + printf("%d ", cpu_cores[i]); + } + printf("\n"); + } + + int ret = sched_setaffinity(cuda_wrapper_getid(), sizeof(cpu_set_t), &wrapper_info->GPUAffinity[phys_gpu]); - if (ret){ - FPRINTF("Warning!! Unable to setup processor affinity for virtual device %d, Physical device: %d\n", device, phys_gpu); - } + if (ret){ + FPRINTF("Warning!! Unable to setup processor affinity for virtual device %d, Physical device: %d\n", device, phys_gpu); } - - return 0; + } + + return 0; } Modified: trunk/cuda_wrapper.h =================================================================== --- trunk/cuda_wrapper.h 2012-02-27 16:55:09 UTC (rev 79) +++ trunk/cuda_wrapper.h 2012-02-28 21:39:55 UTC (rev 80) @@ -46,6 +46,7 @@ #define MAGIC_JOBID 0x7a3a4b5c #define MAX_GPUS_PER_NODE 8 #define MAX_DEVICES_PER_NODE 8 +#define MAX_CPU_CORES_PER_NODE 64 #define WRAPPER_SHMEM_PERM 0666 #define MAX_KEY_ENV_VAR 32 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |