Fixed bug in tools.py for NVidia GPUs

This commit is contained in:
Philippe Tillet
2014-10-14 13:58:28 -04:00
parent 6313f060cb
commit f60adab3dc

View File

@@ -1,4 +1,5 @@
from __future__ import division from __future__ import division
import pyopencl import pyopencl
import time import time
import os import os
@@ -118,7 +119,7 @@ def _int_ceiling(value, multiple_of=1):
class OccupancyRecord: class OccupancyRecord:
def init_nvidia(self, dev, threads, shared_mem, registers): def init_nvidia(self, dev, threads, shared_mem, registers):
pl = PhysicalLimitsAMD(dev) pl = PhysicalLimitsNV(dev)
limits = [] limits = []
allocated_warps = max(1,_int_ceiling(threads/pl.threads_per_warp)) allocated_warps = max(1,_int_ceiling(threads/pl.threads_per_warp))
max_warps_per_mp = pl.warps_per_mp max_warps_per_mp = pl.warps_per_mp
@@ -168,9 +169,9 @@ class OccupancyRecord:
def __init__(self, dev, threads, shared_mem=0, registers=0): def __init__(self, dev, threads, shared_mem=0, registers=0):
if 'Advanced Micro Devices' in dev.vendor: if 'advanced micro devices' in dev.vendor.lower():
self.init_amd(dev, threads, shared_mem, registers) self.init_amd(dev, threads, shared_mem, registers)
elif 'NVidia' in dev.vendor: elif 'nvidia' in dev.vendor.lower():
self.init_nvidia(dev, threads, shared_mem, registers) self.init_nvidia(dev, threads, shared_mem, registers)