Files
triton/python/src/bind/driver.cpp
Philippe Tillet feeb1e9862 Feature: Merged kernel-fusion branch
* Fuses multiple AXPY kernel
* Possibility to add thread-wise for loops in AXPY-like kernels
2015-09-30 15:31:41 -04:00

174 lines
6.6 KiB
C++

#include <memory>
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
#include "isaac/driver/device.h"
#include "isaac/symbolic/execute.h"
#include "common.hpp"
#include "driver.h"
namespace detail
{
bp::list nv_compute_capability(sc::driver::Device const & device)
{
bp::list res;
std::pair<unsigned int, unsigned int> cc = device.nv_compute_capability();
res.append(cc.first);
res.append(cc.second);
return res;
}
bp::list get_platforms()
{
std::vector<sc::driver::Platform> platforms;
sc::driver::backend::platforms(platforms);
return tools::to_list(platforms.begin(), platforms.end());
}
bp::list get_devices(sc::driver::Platform const & platform)
{
std::vector<sc::driver::Device> devices;
platform.devices(devices);
return tools::to_list(devices.begin(), devices.end());
}
bp::list get_queues(sc::driver::Context const & context)
{
std::vector<sc::driver::CommandQueue*> queues;
sc::driver::backend::queues::get(context, queues);
bp::list res;
for(sc::driver::CommandQueue* queue:queues)
res.append(*queue);
return res;
}
std::shared_ptr< sc::driver::CommandQueue> create_queue(sc::driver::Context const & context, sc::driver::Device const & device)
{
return std::shared_ptr<sc::driver::CommandQueue>(new sc::driver::CommandQueue(context, device));
}
std::string to_string(sc::driver::Device::Type type)
{
if(type==sc::driver::Device::Type::CPU) return "CPU";
if(type==sc::driver::Device::Type::GPU) return "GPU";
if(type==sc::driver::Device::Type::ACCELERATOR) return "ACCELERATOR";
throw;
}
std::shared_ptr<sc::driver::Context> make_context(sc::driver::Device const & dev)
{ return std::shared_ptr<sc::driver::Context>(new sc::driver::Context(dev)); }
bp::object enqueue(sc::math_expression const & expression, unsigned int queue_id, bp::list dependencies, bool tune, int label, std::string const & program_name, bool force_recompile)
{
std::list<sc::driver::Event> events;
std::vector<sc::driver::Event> cdependencies = tools::to_vector<sc::driver::Event>(dependencies);
sc::execution_options_type execution_options(queue_id, &events, &cdependencies);
sc::dispatcher_options_type dispatcher_options(tune, label);
sc::compilation_options_type compilation_options(program_name, force_recompile);
sc::math_expression::container_type::value_type root = expression.tree()[expression.root()];
if(sc::detail::is_assignment(root.op))
{
sc::execute(sc::execution_handler(expression, execution_options, dispatcher_options, compilation_options), isaac::profiles::get(execution_options.queue(expression.context())));
return bp::make_tuple(bp::ptr(root.lhs.array), tools::to_list(events.begin(), events.end()));
}
else
{
std::shared_ptr<sc::array> parray(new sc::array(sc::execution_handler(expression, execution_options, dispatcher_options, compilation_options)));
return bp::make_tuple(parray, tools::to_list(events.begin(), events.end()));
}
}
}
struct default_driver_values_type{ };
default_driver_values_type default_driver_parameters;
void export_driver()
{
typedef std::vector<sc::driver::CommandQueue> queues_t;
bp::object driver_module(bp::handle<>(bp::borrowed(PyImport_AddModule("isaac.driver"))));
bp::scope().attr("driver") = driver_module;
bp::scope driver_scope = driver_module;
bp::class_<queues_t>("queues")
.def("__len__", &queues_t::size)
.def("__getitem__", &bp::vector_indexing_suite<queues_t>::get_item, bp::return_internal_reference<>())
.def("__setitem__", &bp::vector_indexing_suite<queues_t>::set_item, bp::with_custodian_and_ward<1,2>())
.def("append", &bp::vector_indexing_suite<queues_t>::append)
;
bp::enum_<sc::driver::backend_type>
("backend_type")
.value("OPENCL", sc::driver::OPENCL)
.value("CUDA", sc::driver::CUDA)
;
bp::enum_<sc::driver::Device::Type>
("device_type")
.value("DEVICE_TYPE_GPU", sc::driver::Device::Type::GPU)
.value("DEVICE_TYPE_CPU", sc::driver::Device::Type::CPU)
;
bp::class_<sc::driver::Platform>("platform", bp::no_init)
.def("get_devices", &detail::get_devices)
.add_property("name",&sc::driver::Platform::name)
;
bp::enum_<isaac::driver::Device::Vendor>
("vendor")
.value("AMD", sc::driver::Device::Vendor::AMD)
.value("INTEL", sc::driver::Device::Vendor::INTEL)
.value("NVIDIA", sc::driver::Device::Vendor::NVIDIA)
.value("UNKNOWN", sc::driver::Device::Vendor::UNKNOWN)
;
bp::class_<sc::driver::Device>("device", bp::no_init)
.add_property("clock_rate", &sc::driver::Device::clock_rate)
.add_property("name", &sc::driver::Device::name)
.add_property("type", &sc::driver::Device::type)
.add_property("platform", &sc::driver::Device::platform)
.add_property("vendor", &sc::driver::Device::vendor)
.add_property("nv_compute_capability", &detail::nv_compute_capability)
.add_property("infos", &sc::driver::Device::infos)
;
bp::class_<sc::driver::Context, boost::noncopyable>("context", bp::no_init)
.def("__init__", bp::make_constructor(&detail::make_context))
.def("synchronize", &sc::driver::backend::synchronize)
.add_property("queues", &detail::get_queues)
.add_property("backend", &sc::driver::Context::backend)
;
bp::class_<sc::driver::CommandQueue>("command_queue", bp::init<sc::driver::Context const &, sc::driver::Device const &>())
.def("synchronize", &sc::driver::CommandQueue::synchronize)
.add_property("profiles", bp::make_function(&sc::profiles::get, bp::return_internal_reference<>()))
.add_property("device", bp::make_function(&sc::driver::CommandQueue::device, bp::return_internal_reference<>()))
;
bp::class_<sc::driver::Event>("event", bp::init<sc::driver::backend_type>())
.add_property("elapsed_time", &sc::driver::Event::elapsed_time)
;
bp::def("device_type_to_string", &detail::to_string);
bp::def("get_platforms", &detail::get_platforms);
bp::def("enqueue", &detail::enqueue, (bp::arg("expression"), bp::arg("queue_id") = 0, bp::arg("dependencies")=bp::list(), bp::arg("tune") = false, bp::arg("label")=-1, bp::arg("program_name")="", bp::arg("recompile") = false));
bp::class_<default_driver_values_type>("default_type")
.def_readwrite("queue_properties",&sc::driver::backend::default_queue_properties)
.def_readwrite("device", &sc::driver::backend::default_device)
;
bp::scope().attr("default") = bp::object(bp::ptr(&default_driver_parameters));
bp::scope().attr("PROFILING_ENABLE") = CL_QUEUE_PROFILING_ENABLE;
}