save
This commit is contained in:
@@ -38,7 +38,7 @@ foreach(PROG blas overhead)
|
|||||||
cuda_add_cublas_to_target(${PROG}-bench)
|
cuda_add_cublas_to_target(${PROG}-bench)
|
||||||
else()
|
else()
|
||||||
add_executable(${PROG}-bench ${PROG}.cpp)
|
add_executable(${PROG}-bench ${PROG}.cpp)
|
||||||
set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "-Wall -Wextra ${BLAS_DEF_STR}")
|
set_target_properties(${PROG}-bench PROPERTIES COMPILE_FLAGS "-Wall -Wextra ${BLAS_DEF_STR} -std=c++11")
|
||||||
endif()
|
endif()
|
||||||
target_link_libraries(${PROG}-bench ${BLAS_LIBS})
|
target_link_libraries(${PROG}-bench ${BLAS_LIBS})
|
||||||
endforeach(PROG)
|
endforeach(PROG)
|
||||||
|
@@ -32,7 +32,7 @@ void bench(ad::numeric_type dtype)
|
|||||||
total_time = 0;\
|
total_time = 0;\
|
||||||
OP;\
|
OP;\
|
||||||
ad::cl_ext::synchronize(ad::cl_ext::default_context());\
|
ad::cl_ext::synchronize(ad::cl_ext::default_context());\
|
||||||
while(total_time < 1e-2){\
|
while(total_time < 1e-1){\
|
||||||
timer.start(); \
|
timer.start(); \
|
||||||
OP;\
|
OP;\
|
||||||
SYNC;\
|
SYNC;\
|
||||||
|
@@ -1,9 +1,10 @@
|
|||||||
file(GLOB CLAMDBLAS_ROOT /opt/clAmdBlas*)
|
file(GLOB CLAMDBLAS_ROOT /opt/clBlas*)
|
||||||
|
|
||||||
set(CLAMDBLAS_INCLUDE_HINTS "${CLAMDBLAS_ROOT}/include")
|
set(CLAMDBLAS_INCLUDE_HINTS "${CLAMDBLAS_ROOT}/include")
|
||||||
set(CLAMDBLAS_LIBRARIES_HINTS "${CLAMDBLAS_ROOT}/lib64")
|
set(CLAMDBLAS_LIBRARIES_HINTS "${CLAMDBLAS_ROOT}/lib64")
|
||||||
|
|
||||||
find_path(CLAMDBLAS_INCLUDE_DIR clAmdBlas.h HINTS ${CLAMDBLAS_INCLUDE_HINTS})
|
find_path(CLAMDBLAS_INCLUDE_DIR clAmdBlas.h HINTS ${CLAMDBLAS_INCLUDE_HINTS})
|
||||||
find_library(CLAMDBLAS_LIBRARIES NAMES clAmdBlas HINTS ${CLAMDBLAS_LIBRARIES_HINTS})
|
find_library(CLAMDBLAS_LIBRARIES NAMES clBLAS HINTS ${CLAMDBLAS_LIBRARIES_HINTS})
|
||||||
|
|
||||||
if(CLAMDBLAS_LIBRARIES)
|
if(CLAMDBLAS_LIBRARIES)
|
||||||
set(CLAMDBLAS_LIBRARIES ${CLAMDBLAS_LIBRARIES})
|
set(CLAMDBLAS_LIBRARIES ${CLAMDBLAS_LIBRARIES})
|
||||||
|
@@ -6,7 +6,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
|||||||
|
|
||||||
add_library(atidlas SHARED ${LIBATIDLAS_SRC})
|
add_library(atidlas SHARED ${LIBATIDLAS_SRC})
|
||||||
set_target_properties(atidlas PROPERTIES
|
set_target_properties(atidlas PROPERTIES
|
||||||
COMPILE_FLAGS "-Wno-sign-compare -D__CL_ENABLE_EXCEPTIONS -Wall -Wextra -pedantic")
|
COMPILE_FLAGS "-Wno-sign-compare -D__CL_ENABLE_EXCEPTIONS -Wall -Wextra -pedantic -std=c++11")
|
||||||
|
|
||||||
|
|
||||||
#install(TARGETS atidlas LIBRARY DESTINATION lib)
|
#install(TARGETS atidlas LIBRARY DESTINATION lib)
|
||||||
|
@@ -256,8 +256,8 @@ model_map_t init_models(cl::CommandQueue & queue)
|
|||||||
res[std::make_pair(MATRIX_PRODUCT_NT_TYPE, DTYPE)] = ptr_t(new model(mproduct_nt(1, 8, 8, 8, 4, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8), queue));
|
res[std::make_pair(MATRIX_PRODUCT_NT_TYPE, DTYPE)] = ptr_t(new model(mproduct_nt(1, 8, 8, 8, 4, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8), queue));
|
||||||
res[std::make_pair(MATRIX_PRODUCT_TT_TYPE, DTYPE)] = ptr_t(new model(mproduct_tt(1, 8, 8, 8, 4, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8), queue));
|
res[std::make_pair(MATRIX_PRODUCT_TT_TYPE, DTYPE)] = ptr_t(new model(mproduct_tt(1, 8, 8, 8, 4, 1, 4, FETCH_FROM_LOCAL, FETCH_FROM_LOCAL, 8, 8), queue));
|
||||||
}
|
}
|
||||||
if(const char * cmodel_file = std::getenv("ATIDLAS_MODEL_DEVICE_0"))
|
if(const char * homepath = std::getenv("HOME"))
|
||||||
import(std::string(cmodel_file), queue, res);
|
import(std::string(homepath) + "/.atidlas/devices/device0.json", queue, res);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
import sys
|
import os, sys
|
||||||
|
|
||||||
def add_input(help, default):
|
def add_input(help, default):
|
||||||
sys.stdout.write(help + "[" + default + "] : ")
|
sys.stdout.write(help + "[" + default + "] : ")
|
||||||
@@ -31,6 +31,6 @@ if retune=='y':
|
|||||||
print '----------------'
|
print '----------------'
|
||||||
opts += ['--blas3-size'] + [add_input('BLAS3 sizes (M,N,K)', '1024,1024,1024').split(',')]
|
opts += ['--blas3-size'] + [add_input('BLAS3 sizes (M,N,K)', '1024,1024,1024').split(',')]
|
||||||
print '----------------'
|
print '----------------'
|
||||||
subprocess.call(["${CMAKE_BINARY_DIR}/python/autotune/dist/autotune", "tune"] + opts +['--json', 'tmp.json'])
|
subprocess.call(["${CMAKE_BINARY_DIR}/python/autotune/dist/autotune", "tune"] + opts +['--json', os.environ['HOME'] + '/.atidlas/devices/device' + opts[1] + '.json'])
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user