[DRIVER] Fix typos (#939)
This commit is contained in:
46
include/triton/external/CUDA/cuda.h
vendored
46
include/triton/external/CUDA/cuda.h
vendored
@@ -818,7 +818,7 @@ typedef enum CUcomputemode_enum {
|
|||||||
* Memory advise values
|
* Memory advise values
|
||||||
*/
|
*/
|
||||||
typedef enum CUmem_advise_enum {
|
typedef enum CUmem_advise_enum {
|
||||||
CU_MEM_ADVISE_SET_READ_MOSTLY = 1, /**< Data will mostly be read and only occassionally be written to */
|
CU_MEM_ADVISE_SET_READ_MOSTLY = 1, /**< Data will mostly be read and only occasionally be written to */
|
||||||
CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2, /**< Undo the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY */
|
CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2, /**< Undo the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY */
|
||||||
CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3, /**< Set the preferred location for the data as the specified device */
|
CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3, /**< Set the preferred location for the data as the specified device */
|
||||||
CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4, /**< Clear the preferred location for the data */
|
CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4, /**< Clear the preferred location for the data */
|
||||||
@@ -827,7 +827,7 @@ typedef enum CUmem_advise_enum {
|
|||||||
} CUmem_advise;
|
} CUmem_advise;
|
||||||
|
|
||||||
typedef enum CUmem_range_attribute_enum {
|
typedef enum CUmem_range_attribute_enum {
|
||||||
CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1, /**< Whether the range will mostly be read and only occassionally be written to */
|
CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1, /**< Whether the range will mostly be read and only occasionally be written to */
|
||||||
CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2, /**< The preferred location of the range */
|
CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2, /**< The preferred location of the range */
|
||||||
CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3, /**< Memory range has ::CU_MEM_ADVISE_SET_ACCESSED_BY set for specified device */
|
CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3, /**< Memory range has ::CU_MEM_ADVISE_SET_ACCESSED_BY set for specified device */
|
||||||
CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4 /**< The last location to which the range was prefetched */
|
CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4 /**< The last location to which the range was prefetched */
|
||||||
@@ -849,7 +849,7 @@ typedef enum CUjit_option_enum
|
|||||||
* IN: Specifies minimum number of threads per block to target compilation
|
* IN: Specifies minimum number of threads per block to target compilation
|
||||||
* for\n
|
* for\n
|
||||||
* OUT: Returns the number of threads the compiler actually targeted.
|
* OUT: Returns the number of threads the compiler actually targeted.
|
||||||
* This restricts the resource utilization fo the compiler (e.g. max
|
* This restricts the resource utilization of the compiler (e.g. max
|
||||||
* registers) such that a block with the given number of threads should be
|
* registers) such that a block with the given number of threads should be
|
||||||
* able to launch based on register limitations. Note, this option does not
|
* able to launch based on register limitations. Note, this option does not
|
||||||
* currently take into account any other resource limitations, such as
|
* currently take into account any other resource limitations, such as
|
||||||
@@ -974,10 +974,10 @@ typedef enum CUjit_option_enum
|
|||||||
CU_JIT_FAST_COMPILE,
|
CU_JIT_FAST_COMPILE,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Array of device symbol names that will be relocated to the corresponing
|
* Array of device symbol names that will be relocated to the corresponding
|
||||||
* host addresses stored in ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES.\n
|
* host addresses stored in ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES.\n
|
||||||
* Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n
|
* Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries.\n
|
||||||
* When loding a device module, driver will relocate all encountered
|
* When loading a device module, driver will relocate all encountered
|
||||||
* unresolved symbols to the host addresses.\n
|
* unresolved symbols to the host addresses.\n
|
||||||
* It is only allowed to register symbols that correspond to unresolved
|
* It is only allowed to register symbols that correspond to unresolved
|
||||||
* global variables.\n
|
* global variables.\n
|
||||||
@@ -1194,7 +1194,7 @@ typedef enum CUlimit_enum {
|
|||||||
* Resource types
|
* Resource types
|
||||||
*/
|
*/
|
||||||
typedef enum CUresourcetype_enum {
|
typedef enum CUresourcetype_enum {
|
||||||
CU_RESOURCE_TYPE_ARRAY = 0x00, /**< Array resoure */
|
CU_RESOURCE_TYPE_ARRAY = 0x00, /**< Array resource */
|
||||||
CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */
|
CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */
|
||||||
CU_RESOURCE_TYPE_LINEAR = 0x02, /**< Linear resource */
|
CU_RESOURCE_TYPE_LINEAR = 0x02, /**< Linear resource */
|
||||||
CU_RESOURCE_TYPE_PITCH2D = 0x03 /**< Pitch 2D resource */
|
CU_RESOURCE_TYPE_PITCH2D = 0x03 /**< Pitch 2D resource */
|
||||||
@@ -2914,9 +2914,9 @@ typedef struct CUmemAllocationProp_st {
|
|||||||
CUmemLocation location;
|
CUmemLocation location;
|
||||||
/**
|
/**
|
||||||
* Windows-specific POBJECT_ATTRIBUTES required when
|
* Windows-specific POBJECT_ATTRIBUTES required when
|
||||||
* ::CU_MEM_HANDLE_TYPE_WIN32 is specified. This object atributes structure
|
* ::CU_MEM_HANDLE_TYPE_WIN32 is specified. This object attributes structure
|
||||||
* includes security attributes that define
|
* includes security attributes that define
|
||||||
* the scope of which exported allocations may be tranferred to other
|
* the scope of which exported allocations may be transferred to other
|
||||||
* processes. In all other cases, this field is required to be zero.
|
* processes. In all other cases, this field is required to be zero.
|
||||||
*/
|
*/
|
||||||
void *win32HandleMetaData;
|
void *win32HandleMetaData;
|
||||||
@@ -3036,7 +3036,7 @@ typedef struct CUmemPoolProps_st {
|
|||||||
/**
|
/**
|
||||||
* Windows-specific LPSECURITYATTRIBUTES required when
|
* Windows-specific LPSECURITYATTRIBUTES required when
|
||||||
* ::CU_MEM_HANDLE_TYPE_WIN32 is specified. This security attribute defines
|
* ::CU_MEM_HANDLE_TYPE_WIN32 is specified. This security attribute defines
|
||||||
* the scope of which exported allocations may be tranferred to other
|
* the scope of which exported allocations may be transferred to other
|
||||||
* processes. In all other cases, this field is required to be zero.
|
* processes. In all other cases, this field is required to be zero.
|
||||||
*/
|
*/
|
||||||
void *win32SecurityAttributes;
|
void *win32SecurityAttributes;
|
||||||
@@ -3519,7 +3519,7 @@ CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
|
|||||||
CUresult CUDAAPI cuDeviceGetCount(int *count);
|
CUresult CUDAAPI cuDeviceGetCount(int *count);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Returns an identifer string for the device
|
* \brief Returns an identifier string for the device
|
||||||
*
|
*
|
||||||
* Returns an ASCII string identifying the device \p dev in the NULL-terminated
|
* Returns an ASCII string identifying the device \p dev in the NULL-terminated
|
||||||
* string pointed to by \p name. \p len specifies the maximum length of the
|
* string pointed to by \p name. \p len specifies the maximum length of the
|
||||||
@@ -3556,7 +3556,7 @@ CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev);
|
|||||||
* Note there is a later version of this API, ::cuDeviceGetUuid_v2. It will
|
* Note there is a later version of this API, ::cuDeviceGetUuid_v2. It will
|
||||||
* supplant this version in 12.0, which is retained for minor version compatibility.
|
* supplant this version in 12.0, which is retained for minor version compatibility.
|
||||||
*
|
*
|
||||||
* Returns 16-octets identifing the device \p dev in the structure
|
* Returns 16-octets identifying the device \p dev in the structure
|
||||||
* pointed by the \p uuid.
|
* pointed by the \p uuid.
|
||||||
*
|
*
|
||||||
* \param uuid - Returned UUID
|
* \param uuid - Returned UUID
|
||||||
@@ -3586,7 +3586,7 @@ CUresult CUDAAPI cuDeviceGetUuid(CUuuid *uuid, CUdevice dev);
|
|||||||
/**
|
/**
|
||||||
* \brief Return an UUID for the device (11.4+)
|
* \brief Return an UUID for the device (11.4+)
|
||||||
*
|
*
|
||||||
* Returns 16-octets identifing the device \p dev in the structure
|
* Returns 16-octets identifying the device \p dev in the structure
|
||||||
* pointed by the \p uuid. If the device is in MIG mode, returns its
|
* pointed by the \p uuid. If the device is in MIG mode, returns its
|
||||||
* MIG UUID which uniquely identifies the subscribed MIG compute instance.
|
* MIG UUID which uniquely identifies the subscribed MIG compute instance.
|
||||||
*
|
*
|
||||||
@@ -3867,7 +3867,7 @@ CUresult CUDAAPI cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements,
|
|||||||
* supports native atomic operations.
|
* supports native atomic operations.
|
||||||
* - ::CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO: Ratio of single precision performance
|
* - ::CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO: Ratio of single precision performance
|
||||||
* (in floating-point operations per second) to double precision performance.
|
* (in floating-point operations per second) to double precision performance.
|
||||||
* - ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS: Device suppports coherently accessing
|
* - ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS: Device supports coherently accessing
|
||||||
* pageable memory without calling cudaHostRegister on it.
|
* pageable memory without calling cudaHostRegister on it.
|
||||||
* - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS: Device can coherently access managed memory
|
* - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS: Device can coherently access managed memory
|
||||||
* concurrently with the CPU.
|
* concurrently with the CPU.
|
||||||
@@ -3875,7 +3875,7 @@ CUresult CUDAAPI cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements,
|
|||||||
* - ::CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM: Device can access host registered
|
* - ::CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM: Device can access host registered
|
||||||
* memory at the same virtual address as the CPU.
|
* memory at the same virtual address as the CPU.
|
||||||
* - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN: The maximum per block shared memory size
|
* - ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN: The maximum per block shared memory size
|
||||||
* suported on this device. This is the maximum value that can be opted into when using the cuFuncSetAttribute() call.
|
* supported on this device. This is the maximum value that can be opted into when using the cuFuncSetAttribute() call.
|
||||||
* For more details see ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
|
* For more details see ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
|
||||||
* - ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES: Device accesses pageable memory via the host's
|
* - ::CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES: Device accesses pageable memory via the host's
|
||||||
* page tables.
|
* page tables.
|
||||||
@@ -4132,7 +4132,7 @@ __CUDA_DEPRECATED CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, CUdevi
|
|||||||
*
|
*
|
||||||
* \deprecated
|
* \deprecated
|
||||||
*
|
*
|
||||||
* This function was deprecated as of CUDA 5.0 and its functionality superceded
|
* This function was deprecated as of CUDA 5.0 and its functionality superseded
|
||||||
* by ::cuDeviceGetAttribute().
|
* by ::cuDeviceGetAttribute().
|
||||||
*
|
*
|
||||||
* Returns in \p *major and \p *minor the major and minor revision numbers that
|
* Returns in \p *major and \p *minor the major and minor revision numbers that
|
||||||
@@ -4962,10 +4962,10 @@ CUresult CUDAAPI cuCtxSynchronize(void);
|
|||||||
* returned.
|
* returned.
|
||||||
*
|
*
|
||||||
* - ::CU_LIMIT_MAX_L2_FETCH_GRANULARITY controls the L2 cache fetch granularity.
|
* - ::CU_LIMIT_MAX_L2_FETCH_GRANULARITY controls the L2 cache fetch granularity.
|
||||||
* Values can range from 0B to 128B. This is purely a performence hint and
|
* Values can range from 0B to 128B. This is purely a performance hint and
|
||||||
* it can be ignored or clamped depending on the platform.
|
* it can be ignored or clamped depending on the platform.
|
||||||
*
|
*
|
||||||
* - ::CU_LIMIT_PERSISTING_L2_CACHE_SIZE controls size in bytes availabe for
|
* - ::CU_LIMIT_PERSISTING_L2_CACHE_SIZE controls size in bytes available for
|
||||||
* persisting L2 cache. This is purely a performance hint and it can be
|
* persisting L2 cache. This is purely a performance hint and it can be
|
||||||
* ignored or clamped depending on the platform.
|
* ignored or clamped depending on the platform.
|
||||||
*
|
*
|
||||||
@@ -6398,7 +6398,7 @@ CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p);
|
|||||||
* ::cuStreamAttachMemAsync will be required to enable access on such devices.
|
* ::cuStreamAttachMemAsync will be required to enable access on such devices.
|
||||||
*
|
*
|
||||||
* If the association is later changed via ::cuStreamAttachMemAsync to
|
* If the association is later changed via ::cuStreamAttachMemAsync to
|
||||||
* a single stream, the default association as specifed during ::cuMemAllocManaged
|
* a single stream, the default association as specified during ::cuMemAllocManaged
|
||||||
* is restored when that stream is destroyed. For __managed__ variables, the
|
* is restored when that stream is destroyed. For __managed__ variables, the
|
||||||
* default association is always ::CU_MEM_ATTACH_GLOBAL. Note that destroying a
|
* default association is always ::CU_MEM_ATTACH_GLOBAL. Note that destroying a
|
||||||
* stream is an asynchronous operation, and as a result, the change to default
|
* stream is an asynchronous operation, and as a result, the change to default
|
||||||
@@ -9616,13 +9616,13 @@ CUresult CUDAAPI cuMemAddressFree(CUdeviceptr ptr, size_t size);
|
|||||||
* \brief Create a CUDA memory handle representing a memory allocation of a given size described by the given properties
|
* \brief Create a CUDA memory handle representing a memory allocation of a given size described by the given properties
|
||||||
*
|
*
|
||||||
* This creates a memory allocation on the target device specified through the
|
* This creates a memory allocation on the target device specified through the
|
||||||
* \p prop strcuture. The created allocation will not have any device or host
|
* \p prop structure. The created allocation will not have any device or host
|
||||||
* mappings. The generic memory \p handle for the allocation can be
|
* mappings. The generic memory \p handle for the allocation can be
|
||||||
* mapped to the address space of calling process via ::cuMemMap. This handle
|
* mapped to the address space of calling process via ::cuMemMap. This handle
|
||||||
* cannot be transmitted directly to other processes (see
|
* cannot be transmitted directly to other processes (see
|
||||||
* ::cuMemExportToShareableHandle). On Windows, the caller must also pass
|
* ::cuMemExportToShareableHandle). On Windows, the caller must also pass
|
||||||
* an LPSECURITYATTRIBUTE in \p prop to be associated with this handle which
|
* an LPSECURITYATTRIBUTE in \p prop to be associated with this handle which
|
||||||
* limits or allows access to this handle for a recepient process (see
|
* limits or allows access to this handle for a recipient process (see
|
||||||
* ::CUmemAllocationProp::win32HandleMetaData for more). The \p size of this
|
* ::CUmemAllocationProp::win32HandleMetaData for more). The \p size of this
|
||||||
* allocation must be a multiple of the the value given via
|
* allocation must be a multiple of the the value given via
|
||||||
* ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM
|
* ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM
|
||||||
@@ -9660,7 +9660,7 @@ CUresult CUDAAPI cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size,
|
|||||||
* are unmapped and when all outstanding references to the handle (including it's
|
* are unmapped and when all outstanding references to the handle (including it's
|
||||||
* shareable counterparts) are also released. The generic memory handle can be
|
* shareable counterparts) are also released. The generic memory handle can be
|
||||||
* freed when there are still outstanding mappings made with this handle. Each
|
* freed when there are still outstanding mappings made with this handle. Each
|
||||||
* time a recepient process imports a shareable handle, it needs to pair it with
|
* time a recipient process imports a shareable handle, it needs to pair it with
|
||||||
* ::cuMemRelease for the handle to be freed. If \p handle is not a valid handle
|
* ::cuMemRelease for the handle to be freed. If \p handle is not a valid handle
|
||||||
* the behavior is undefined.
|
* the behavior is undefined.
|
||||||
*
|
*
|
||||||
@@ -10975,7 +10975,7 @@ CUresult CUDAAPI cuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advi
|
|||||||
* a GPU id or CU_DEVICE_CPU depending on whether the last location for prefetch was a GPU or the CPU
|
* a GPU id or CU_DEVICE_CPU depending on whether the last location for prefetch was a GPU or the CPU
|
||||||
* respectively. If any page in the memory range was never explicitly prefetched or if all pages were not
|
* respectively. If any page in the memory range was never explicitly prefetched or if all pages were not
|
||||||
* prefetched to the same location, CU_DEVICE_INVALID will be returned. Note that this simply returns the
|
* prefetched to the same location, CU_DEVICE_INVALID will be returned. Note that this simply returns the
|
||||||
* last location that the applicaton requested to prefetch the memory range to. It gives no indication as to
|
* last location that the application requested to prefetch the memory range to. It gives no indication as to
|
||||||
* whether the prefetch operation to that location has completed or even begun.
|
* whether the prefetch operation to that location has completed or even begun.
|
||||||
*
|
*
|
||||||
* \param data - A pointers to a memory location where the result
|
* \param data - A pointers to a memory location where the result
|
||||||
@@ -13561,7 +13561,7 @@ CUresult CUDAAPI cuLaunchCooperativeKernel(CUfunction f,
|
|||||||
* All kernels launched must be identical with respect to the compiled code. Note that
|
* All kernels launched must be identical with respect to the compiled code. Note that
|
||||||
* any __device__, __constant__ or __managed__ variables present in the module that owns
|
* any __device__, __constant__ or __managed__ variables present in the module that owns
|
||||||
* the kernel launched on each device, are independently instantiated on every device.
|
* the kernel launched on each device, are independently instantiated on every device.
|
||||||
* It is the application's responsiblity to ensure these variables are initialized and
|
* It is the application's responsibility to ensure these variables are initialized and
|
||||||
* used appropriately.
|
* used appropriately.
|
||||||
*
|
*
|
||||||
* The size of the grids as specified in blocks, the size of the blocks themselves
|
* The size of the grids as specified in blocks, the size of the blocks themselves
|
||||||
|
46
include/triton/external/CUDA/nvml.h
vendored
46
include/triton/external/CUDA/nvml.h
vendored
@@ -328,7 +328,7 @@ typedef enum nvmlGpuLevel_enum
|
|||||||
typedef enum nvmlGpuP2PStatus_enum
|
typedef enum nvmlGpuP2PStatus_enum
|
||||||
{
|
{
|
||||||
NVML_P2P_STATUS_OK = 0,
|
NVML_P2P_STATUS_OK = 0,
|
||||||
NVML_P2P_STATUS_CHIPSET_NOT_SUPPORED,
|
NVML_P2P_STATUS_CHIPSET_NOT_SUPPORTED,
|
||||||
NVML_P2P_STATUS_GPU_NOT_SUPPORTED,
|
NVML_P2P_STATUS_GPU_NOT_SUPPORTED,
|
||||||
NVML_P2P_STATUS_IOH_TOPOLOGY_NOT_SUPPORTED,
|
NVML_P2P_STATUS_IOH_TOPOLOGY_NOT_SUPPORTED,
|
||||||
NVML_P2P_STATUS_DISABLED_BY_REGKEY,
|
NVML_P2P_STATUS_DISABLED_BY_REGKEY,
|
||||||
@@ -736,7 +736,7 @@ typedef enum nvmlReturn_enum
|
|||||||
NVML_ERROR_IN_USE = 19, //!< An operation cannot be performed because the GPU is currently in use
|
NVML_ERROR_IN_USE = 19, //!< An operation cannot be performed because the GPU is currently in use
|
||||||
NVML_ERROR_MEMORY = 20, //!< Insufficient memory
|
NVML_ERROR_MEMORY = 20, //!< Insufficient memory
|
||||||
NVML_ERROR_NO_DATA = 21, //!<No data
|
NVML_ERROR_NO_DATA = 21, //!<No data
|
||||||
NVML_ERROR_VGPU_ECC_NOT_SUPPORTED = 22, //!< The requested vgpu operation is not available on target device, becasue ECC is enabled
|
NVML_ERROR_VGPU_ECC_NOT_SUPPORTED = 22, //!< The requested vgpu operation is not available on target device, because ECC is enabled
|
||||||
NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred
|
NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred
|
||||||
} nvmlReturn_t;
|
} nvmlReturn_t;
|
||||||
|
|
||||||
@@ -1463,7 +1463,7 @@ typedef struct nvmlEncoderSessionInfo_st
|
|||||||
*/
|
*/
|
||||||
typedef enum nvmlFBCSessionType_enum
|
typedef enum nvmlFBCSessionType_enum
|
||||||
{
|
{
|
||||||
NVML_FBC_SESSION_TYPE_UNKNOWN = 0, //!< Unknwon
|
NVML_FBC_SESSION_TYPE_UNKNOWN = 0, //!< Unknown
|
||||||
NVML_FBC_SESSION_TYPE_TOSYS, //!< ToSys
|
NVML_FBC_SESSION_TYPE_TOSYS, //!< ToSys
|
||||||
NVML_FBC_SESSION_TYPE_CUDA, //!< Cuda
|
NVML_FBC_SESSION_TYPE_CUDA, //!< Cuda
|
||||||
NVML_FBC_SESSION_TYPE_VID, //!< Vid
|
NVML_FBC_SESSION_TYPE_VID, //!< Vid
|
||||||
@@ -3678,10 +3678,10 @@ nvmlReturn_t DECLDIR nvmlDeviceGetEncoderStats (nvmlDevice_t device, unsigned in
|
|||||||
* Retrieves information about active encoder sessions on a target device.
|
* Retrieves information about active encoder sessions on a target device.
|
||||||
*
|
*
|
||||||
* An array of active encoder sessions is returned in the caller-supplied buffer pointed at by \a sessionInfos. The
|
* An array of active encoder sessions is returned in the caller-supplied buffer pointed at by \a sessionInfos. The
|
||||||
* array elememt count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions
|
* array element count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions
|
||||||
* written to the buffer.
|
* written to the buffer.
|
||||||
*
|
*
|
||||||
* If the supplied buffer is not large enough to accomodate the active session array, the function returns
|
* If the supplied buffer is not large enough to accommodate the active session array, the function returns
|
||||||
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlEncoderSessionInfo_t array required in \a sessionCount.
|
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlEncoderSessionInfo_t array required in \a sessionCount.
|
||||||
* To query the number of active encoder sessions, call this function with *sessionCount = 0. The code will return
|
* To query the number of active encoder sessions, call this function with *sessionCount = 0. The code will return
|
||||||
* NVML_SUCCESS with number of active encoder sessions updated in *sessionCount.
|
* NVML_SUCCESS with number of active encoder sessions updated in *sessionCount.
|
||||||
@@ -3727,7 +3727,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetDecoderUtilization(nvmlDevice_t device, unsign
|
|||||||
* For Maxwell &tm; or newer fully supported devices.
|
* For Maxwell &tm; or newer fully supported devices.
|
||||||
*
|
*
|
||||||
* @param device The identifier of the target device
|
* @param device The identifier of the target device
|
||||||
* @param fbcStats Reference to nvmlFBCStats_t structure contianing NvFBC stats
|
* @param fbcStats Reference to nvmlFBCStats_t structure containing NvFBC stats
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
* - \ref NVML_SUCCESS if \a fbcStats is fetched
|
* - \ref NVML_SUCCESS if \a fbcStats is fetched
|
||||||
@@ -3742,10 +3742,10 @@ nvmlReturn_t DECLDIR nvmlDeviceGetFBCStats(nvmlDevice_t device, nvmlFBCStats_t *
|
|||||||
* Retrieves information about active frame buffer capture sessions on a target device.
|
* Retrieves information about active frame buffer capture sessions on a target device.
|
||||||
*
|
*
|
||||||
* An array of active encoder sessions is returned in the caller-supplied buffer pointed at by \a sessionInfo. The
|
* An array of active encoder sessions is returned in the caller-supplied buffer pointed at by \a sessionInfo. The
|
||||||
* array elememt count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions
|
* array element count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions
|
||||||
* written to the buffer.
|
* written to the buffer.
|
||||||
*
|
*
|
||||||
* If the supplied buffer is not large enough to accomodate the active session array, the function returns
|
* If the supplied buffer is not large enough to accommodate the active session array, the function returns
|
||||||
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlFBCSessionInfo_t array required in \a sessionCount.
|
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlFBCSessionInfo_t array required in \a sessionCount.
|
||||||
* To query the number of active FBC sessions, call this function with *sessionCount = 0. The code will return
|
* To query the number of active FBC sessions, call this function with *sessionCount = 0. The code will return
|
||||||
* NVML_SUCCESS with number of active FBC sessions updated in *sessionCount.
|
* NVML_SUCCESS with number of active FBC sessions updated in *sessionCount.
|
||||||
@@ -4208,7 +4208,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetRetiredPages(nvmlDevice_t device, nvmlPageReti
|
|||||||
* The address information provided from this API is the hardware address of the page that was retired. Note
|
* The address information provided from this API is the hardware address of the page that was retired. Note
|
||||||
* that this does not match the virtual address used in CUDA, but will match the address information in XID 63
|
* that this does not match the virtual address used in CUDA, but will match the address information in XID 63
|
||||||
*
|
*
|
||||||
* \note nvmlDeviceGetRetiredPages_v2 adds an additional timestamps paramter to return the time of each page's
|
* \note nvmlDeviceGetRetiredPages_v2 adds an additional timestamps parameter to return the time of each page's
|
||||||
* retirement.
|
* retirement.
|
||||||
*
|
*
|
||||||
* For Kepler &tm; or newer fully supported devices.
|
* For Kepler &tm; or newer fully supported devices.
|
||||||
@@ -4476,7 +4476,7 @@ nvmlReturn_t DECLDIR nvmlDeviceSetDriverModel(nvmlDevice_t device, nvmlDriverMod
|
|||||||
* Set clocks that device will lock to.
|
* Set clocks that device will lock to.
|
||||||
*
|
*
|
||||||
* Sets the clocks that the device will be running at to the value in the range of minGpuClockMHz to maxGpuClockMHz.
|
* Sets the clocks that the device will be running at to the value in the range of minGpuClockMHz to maxGpuClockMHz.
|
||||||
* Setting this will supercede application clock values and take effect regardless if a cuda app is running.
|
* Setting this will supersede application clock values and take effect regardless if a cuda app is running.
|
||||||
* See /ref nvmlDeviceSetApplicationsClocks
|
* See /ref nvmlDeviceSetApplicationsClocks
|
||||||
*
|
*
|
||||||
* Can be used as a setting to request constant performance.
|
* Can be used as a setting to request constant performance.
|
||||||
@@ -5297,7 +5297,7 @@ nvmlReturn_t DECLDIR nvmlDeviceSetVirtualizationMode(nvmlDevice_t device, nvmlGp
|
|||||||
* pointed at by \a vgpuTypeIds. The element count of nvmlVgpuTypeId_t array is passed in \a vgpuCount, and \a vgpuCount
|
* pointed at by \a vgpuTypeIds. The element count of nvmlVgpuTypeId_t array is passed in \a vgpuCount, and \a vgpuCount
|
||||||
* is used to return the number of vGPU types written to the buffer.
|
* is used to return the number of vGPU types written to the buffer.
|
||||||
*
|
*
|
||||||
* If the supplied buffer is not large enough to accomodate the vGPU type array, the function returns
|
* If the supplied buffer is not large enough to accommodate the vGPU type array, the function returns
|
||||||
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlVgpuTypeId_t array required in \a vgpuCount.
|
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlVgpuTypeId_t array required in \a vgpuCount.
|
||||||
* To query the number of vGPU types supported for the GPU, call this function with *vgpuCount = 0.
|
* To query the number of vGPU types supported for the GPU, call this function with *vgpuCount = 0.
|
||||||
* The code will return NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if no vGPU types are supported.
|
* The code will return NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if no vGPU types are supported.
|
||||||
@@ -5327,9 +5327,9 @@ nvmlReturn_t DECLDIR nvmlDeviceGetSupportedVgpus(nvmlDevice_t device, unsigned i
|
|||||||
* can concurrently run on a device. For example, if only one vGPU type is allowed at a time on a device, then the creatable
|
* can concurrently run on a device. For example, if only one vGPU type is allowed at a time on a device, then the creatable
|
||||||
* list will be restricted to whatever vGPU type is already running on the device.
|
* list will be restricted to whatever vGPU type is already running on the device.
|
||||||
*
|
*
|
||||||
* If the supplied buffer is not large enough to accomodate the vGPU type array, the function returns
|
* If the supplied buffer is not large enough to accommodate the vGPU type array, the function returns
|
||||||
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlVgpuTypeId_t array required in \a vgpuCount.
|
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlVgpuTypeId_t array required in \a vgpuCount.
|
||||||
* To query the number of vGPU types createable for the GPU, call this function with *vgpuCount = 0.
|
* To query the number of vGPU types creatable for the GPU, call this function with *vgpuCount = 0.
|
||||||
* The code will return NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if no vGPU types are creatable.
|
* The code will return NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if no vGPU types are creatable.
|
||||||
*
|
*
|
||||||
* @param device The identifier of the target device
|
* @param device The identifier of the target device
|
||||||
@@ -5392,7 +5392,7 @@ nvmlReturn_t DECLDIR nvmlVgpuTypeGetName(nvmlVgpuTypeId_t vgpuTypeId, char *vgpu
|
|||||||
*
|
*
|
||||||
* @param vgpuTypeId Handle to vGPU type
|
* @param vgpuTypeId Handle to vGPU type
|
||||||
* @param deviceID Device ID and vendor ID of the device contained in single 32 bit value
|
* @param deviceID Device ID and vendor ID of the device contained in single 32 bit value
|
||||||
* @param subsystemID Subsytem ID and subsytem vendor ID of the device contained in single 32 bit value
|
* @param subsystemID subsystem ID and subsystem vendor ID of the device contained in single 32 bit value
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
* - \ref NVML_SUCCESS successful completion
|
* - \ref NVML_SUCCESS successful completion
|
||||||
@@ -5516,10 +5516,10 @@ nvmlReturn_t DECLDIR nvmlVgpuTypeGetMaxInstances(nvmlDevice_t device, nvmlVgpuTy
|
|||||||
* Retrieve the active vGPU instances on a device.
|
* Retrieve the active vGPU instances on a device.
|
||||||
*
|
*
|
||||||
* An array of active vGPU instances is returned in the caller-supplied buffer pointed at by \a vgpuInstances. The
|
* An array of active vGPU instances is returned in the caller-supplied buffer pointed at by \a vgpuInstances. The
|
||||||
* array elememt count is passed in \a vgpuCount, and \a vgpuCount is used to return the number of vGPU instances
|
* array element count is passed in \a vgpuCount, and \a vgpuCount is used to return the number of vGPU instances
|
||||||
* written to the buffer.
|
* written to the buffer.
|
||||||
*
|
*
|
||||||
* If the supplied buffer is not large enough to accomodate the vGPU instance array, the function returns
|
* If the supplied buffer is not large enough to accommodate the vGPU instance array, the function returns
|
||||||
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlVgpuInstance_t array required in \a vgpuCount.
|
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlVgpuInstance_t array required in \a vgpuCount.
|
||||||
* To query the number of active vGPU instances, call this function with *vgpuCount = 0. The code will return
|
* To query the number of active vGPU instances, call this function with *vgpuCount = 0. The code will return
|
||||||
* NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if no vGPU Types are supported.
|
* NVML_ERROR_INSUFFICIENT_SIZE, or NVML_SUCCESS if no vGPU Types are supported.
|
||||||
@@ -5702,7 +5702,7 @@ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetFrameRateLimit(nvmlVgpuInstance_t vgpuIn
|
|||||||
* @param encoderCapacity Reference to an unsigned int for the encoder capacity
|
* @param encoderCapacity Reference to an unsigned int for the encoder capacity
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
* - \ref NVML_SUCCESS if \a encoderCapacity has been retrived
|
* - \ref NVML_SUCCESS if \a encoderCapacity has been retrieved
|
||||||
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
|
* - \ref NVML_ERROR_UNINITIALIZED if the library has not been successfully initialized
|
||||||
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a encoderQueryType is invalid
|
* - \ref NVML_ERROR_INVALID_ARGUMENT if \a vgpuInstance is 0, or \a encoderQueryType is invalid
|
||||||
* - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system
|
* - \ref NVML_ERROR_NOT_FOUND if \a vgpuInstance does not match a valid active vGPU instance on the system
|
||||||
@@ -5863,10 +5863,10 @@ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetEncoderStats(nvmlVgpuInstance_t vgpuInst
|
|||||||
* Retrieves information about all active encoder sessions on a vGPU Instance.
|
* Retrieves information about all active encoder sessions on a vGPU Instance.
|
||||||
*
|
*
|
||||||
* An array of active encoder sessions is returned in the caller-supplied buffer pointed at by \a sessionInfo. The
|
* An array of active encoder sessions is returned in the caller-supplied buffer pointed at by \a sessionInfo. The
|
||||||
* array elememt count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions
|
* array element count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions
|
||||||
* written to the buffer.
|
* written to the buffer.
|
||||||
*
|
*
|
||||||
* If the supplied buffer is not large enough to accomodate the active session array, the function returns
|
* If the supplied buffer is not large enough to accommodate the active session array, the function returns
|
||||||
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlEncoderSessionInfo_t array required in \a sessionCount.
|
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlEncoderSessionInfo_t array required in \a sessionCount.
|
||||||
* To query the number of active encoder sessions, call this function with *sessionCount = 0. The code will return
|
* To query the number of active encoder sessions, call this function with *sessionCount = 0. The code will return
|
||||||
* NVML_SUCCESS with number of active encoder sessions updated in *sessionCount.
|
* NVML_SUCCESS with number of active encoder sessions updated in *sessionCount.
|
||||||
@@ -5896,7 +5896,7 @@ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetEncoderSessions(nvmlVgpuInstance_t vgpuI
|
|||||||
* For Maxwell &tm; or newer fully supported devices.
|
* For Maxwell &tm; or newer fully supported devices.
|
||||||
*
|
*
|
||||||
* @param vgpuInstance Identifier of the target vGPU instance
|
* @param vgpuInstance Identifier of the target vGPU instance
|
||||||
* @param fbcStats Reference to nvmlFBCStats_t structure contianing NvFBC stats
|
* @param fbcStats Reference to nvmlFBCStats_t structure containing NvFBC stats
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
* - \ref NVML_SUCCESS if \a fbcStats is fetched
|
* - \ref NVML_SUCCESS if \a fbcStats is fetched
|
||||||
@@ -5914,7 +5914,7 @@ nvmlReturn_t DECLDIR nvmlVgpuInstanceGetFBCStats(nvmlVgpuInstance_t vgpuInstance
|
|||||||
* array element count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions
|
* array element count is passed in \a sessionCount, and \a sessionCount is used to return the number of sessions
|
||||||
* written to the buffer.
|
* written to the buffer.
|
||||||
*
|
*
|
||||||
* If the supplied buffer is not large enough to accomodate the active session array, the function returns
|
* If the supplied buffer is not large enough to accommodate the active session array, the function returns
|
||||||
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlFBCSessionInfo_t array required in \a sessionCount.
|
* NVML_ERROR_INSUFFICIENT_SIZE, with the element count of nvmlFBCSessionInfo_t array required in \a sessionCount.
|
||||||
* To query the number of active FBC sessions, call this function with *sessionCount = 0. The code will return
|
* To query the number of active FBC sessions, call this function with *sessionCount = 0. The code will return
|
||||||
* NVML_SUCCESS with number of active FBC sessions updated in *sessionCount.
|
* NVML_SUCCESS with number of active FBC sessions updated in *sessionCount.
|
||||||
@@ -6094,7 +6094,7 @@ typedef struct nvmlVgpuPgpuMetadata_st
|
|||||||
unsigned int version; //!< Current version of the structure
|
unsigned int version; //!< Current version of the structure
|
||||||
unsigned int revision; //!< Current revision of the structure
|
unsigned int revision; //!< Current revision of the structure
|
||||||
char hostDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE]; //!< Host driver version
|
char hostDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE]; //!< Host driver version
|
||||||
unsigned int pgpuVirtualizationCaps; //!< Pgpu virtualizaion capabilities bitfileld
|
unsigned int pgpuVirtualizationCaps; //!< Pgpu virtualization capabilities bitfield
|
||||||
unsigned int reserved[7]; //!< Reserved for internal use
|
unsigned int reserved[7]; //!< Reserved for internal use
|
||||||
unsigned int opaqueDataSize; //!< Size of opaque data field in bytes
|
unsigned int opaqueDataSize; //!< Size of opaque data field in bytes
|
||||||
char opaqueData[4]; //!< Opaque data
|
char opaqueData[4]; //!< Opaque data
|
||||||
@@ -6191,7 +6191,7 @@ nvmlReturn_t DECLDIR nvmlDeviceGetVgpuMetadata(nvmlDevice_t device, nvmlVgpuPgpu
|
|||||||
*
|
*
|
||||||
* The caller passes in a buffer via \a compatibilityInfo, into which a compatibility information structure is written. The
|
* The caller passes in a buffer via \a compatibilityInfo, into which a compatibility information structure is written. The
|
||||||
* structure defines the states in which the vGPU / VM may be booted on the physical GPU. If the vGPU / VM compatibility
|
* structure defines the states in which the vGPU / VM may be booted on the physical GPU. If the vGPU / VM compatibility
|
||||||
* with the physical GPU is limited, a limit code indicates the factor limiting compability.
|
* with the physical GPU is limited, a limit code indicates the factor limiting compatibility.
|
||||||
* (see \ref nvmlVgpuPgpuCompatibilityLimitCode_t for details).
|
* (see \ref nvmlVgpuPgpuCompatibilityLimitCode_t for details).
|
||||||
*
|
*
|
||||||
* Note: vGPU compatibility does not take into account dynamic capacity conditions that may limit a system's ability to
|
* Note: vGPU compatibility does not take into account dynamic capacity conditions that may limit a system's ability to
|
||||||
|
18
include/triton/external/half.hpp
vendored
18
include/triton/external/half.hpp
vendored
@@ -950,7 +950,7 @@ namespace half_float
|
|||||||
/// Convert half-precision floating point to integer.
|
/// Convert half-precision floating point to integer.
|
||||||
/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
||||||
/// \tparam E `true` for round to even, `false` for round away from zero
|
/// \tparam E `true` for round to even, `false` for round away from zero
|
||||||
/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
|
/// \tparam T type to convert to (builtin integer type with at least 16 bits precision, excluding any implicit sign bits)
|
||||||
/// \param value binary representation of half-precision value
|
/// \param value binary representation of half-precision value
|
||||||
/// \return integral value
|
/// \return integral value
|
||||||
template<std::float_round_style R,bool E,typename T> T half2int_impl(uint16 value)
|
template<std::float_round_style R,bool E,typename T> T half2int_impl(uint16 value)
|
||||||
@@ -988,13 +988,13 @@ namespace half_float
|
|||||||
|
|
||||||
/// Convert half-precision floating point to integer.
|
/// Convert half-precision floating point to integer.
|
||||||
/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
|
||||||
/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
|
/// \tparam T type to convert to (builtin integer type with at least 16 bits precision, excluding any implicit sign bits)
|
||||||
/// \param value binary representation of half-precision value
|
/// \param value binary representation of half-precision value
|
||||||
/// \return integral value
|
/// \return integral value
|
||||||
template<std::float_round_style R,typename T> T half2int(uint16 value) { return half2int_impl<R,HALF_ROUND_TIES_TO_EVEN,T>(value); }
|
template<std::float_round_style R,typename T> T half2int(uint16 value) { return half2int_impl<R,HALF_ROUND_TIES_TO_EVEN,T>(value); }
|
||||||
|
|
||||||
/// Convert half-precision floating point to integer using round-to-nearest-away-from-zero.
|
/// Convert half-precision floating point to integer using round-to-nearest-away-from-zero.
|
||||||
/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)
|
/// \tparam T type to convert to (builtin integer type with at least 16 bits precision, excluding any implicit sign bits)
|
||||||
/// \param value binary representation of half-precision value
|
/// \param value binary representation of half-precision value
|
||||||
/// \return integral value
|
/// \return integral value
|
||||||
template<typename T> T half2int_up(uint16 value) { return half2int_impl<std::round_to_nearest,0,T>(value); }
|
template<typename T> T half2int_up(uint16 value) { return half2int_impl<std::round_to_nearest,0,T>(value); }
|
||||||
@@ -1053,7 +1053,7 @@ namespace half_float
|
|||||||
|
|
||||||
/// Half-precision floating point type.
|
/// Half-precision floating point type.
|
||||||
/// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and
|
/// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and
|
||||||
/// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and
|
/// conversions. It is implicitly convertible to single-precision floating point, which makes arithmetic expressions and
|
||||||
/// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations
|
/// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations
|
||||||
/// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to
|
/// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to
|
||||||
/// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic
|
/// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic
|
||||||
@@ -1062,7 +1062,7 @@ namespace half_float
|
|||||||
/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and
|
/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and
|
||||||
/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which
|
/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which
|
||||||
/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the
|
/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the
|
||||||
/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of
|
/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not necessarily have to be of
|
||||||
/// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most
|
/// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most
|
||||||
/// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit
|
/// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit
|
||||||
/// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if
|
/// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if
|
||||||
@@ -2181,7 +2181,7 @@ namespace half_float
|
|||||||
|
|
||||||
/// Identity.
|
/// Identity.
|
||||||
/// \param arg operand
|
/// \param arg operand
|
||||||
/// \return uncahnged operand
|
/// \return unchanged operand
|
||||||
template<typename T> HALF_CONSTEXPR typename enable<T,T>::type operator+(T arg) { return arg; }
|
template<typename T> HALF_CONSTEXPR typename enable<T,T>::type operator+(T arg) { return arg; }
|
||||||
|
|
||||||
/// Negation.
|
/// Negation.
|
||||||
@@ -2620,7 +2620,7 @@ namespace half_float
|
|||||||
/// Multiply by power of two.
|
/// Multiply by power of two.
|
||||||
/// \param arg number to modify
|
/// \param arg number to modify
|
||||||
/// \param exp power of two to multiply with
|
/// \param exp power of two to multiply with
|
||||||
/// \return \a arg multplied by 2 raised to \a exp
|
/// \return \a arg multiplied by 2 raised to \a exp
|
||||||
// template<typename T> typename enable<half,T>::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); }
|
// template<typename T> typename enable<half,T>::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); }
|
||||||
inline half ldexp(half arg, int exp) { return functions::scalbln(arg, exp); }
|
inline half ldexp(half arg, int exp) { return functions::scalbln(arg, exp); }
|
||||||
inline half ldexp(expr arg, int exp) { return functions::scalbln(arg, exp); }
|
inline half ldexp(expr arg, int exp) { return functions::scalbln(arg, exp); }
|
||||||
@@ -2636,7 +2636,7 @@ namespace half_float
|
|||||||
/// Multiply by power of two.
|
/// Multiply by power of two.
|
||||||
/// \param arg number to modify
|
/// \param arg number to modify
|
||||||
/// \param exp power of two to multiply with
|
/// \param exp power of two to multiply with
|
||||||
/// \return \a arg multplied by 2 raised to \a exp
|
/// \return \a arg multiplied by 2 raised to \a exp
|
||||||
// template<typename T> typename enable<half,T>::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); }
|
// template<typename T> typename enable<half,T>::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); }
|
||||||
inline half scalbn(half arg, int exp) { return functions::scalbln(arg, exp); }
|
inline half scalbn(half arg, int exp) { return functions::scalbln(arg, exp); }
|
||||||
inline half scalbn(expr arg, int exp) { return functions::scalbln(arg, exp); }
|
inline half scalbn(expr arg, int exp) { return functions::scalbln(arg, exp); }
|
||||||
@@ -2644,7 +2644,7 @@ namespace half_float
|
|||||||
/// Multiply by power of two.
|
/// Multiply by power of two.
|
||||||
/// \param arg number to modify
|
/// \param arg number to modify
|
||||||
/// \param exp power of two to multiply with
|
/// \param exp power of two to multiply with
|
||||||
/// \return \a arg multplied by 2 raised to \a exp
|
/// \return \a arg multiplied by 2 raised to \a exp
|
||||||
// template<typename T> typename enable<half,T>::type scalbln(T arg, long exp) { return functions::scalbln(arg, exp); }
|
// template<typename T> typename enable<half,T>::type scalbln(T arg, long exp) { return functions::scalbln(arg, exp); }
|
||||||
inline half scalbln(half arg, long exp) { return functions::scalbln(arg, exp); }
|
inline half scalbln(half arg, long exp) { return functions::scalbln(arg, exp); }
|
||||||
inline half scalbln(expr arg, long exp) { return functions::scalbln(arg, exp); }
|
inline half scalbln(expr arg, long exp) { return functions::scalbln(arg, exp); }
|
||||||
|
@@ -91,7 +91,7 @@ void inliner::do_inline(ir::function* fn, ir::call_inst* callsite, ir::builder&
|
|||||||
if(inst_map.find(inst_op) != inst_map.end())
|
if(inst_map.find(inst_op) != inst_map.end())
|
||||||
new_inst->set_operand(k, inst_map.at(inst_op));
|
new_inst->set_operand(k, inst_map.at(inst_op));
|
||||||
}
|
}
|
||||||
// handles a ret instruciton.
|
// handles a ret instruction.
|
||||||
// instead of returning we need to branch to after the function call
|
// instead of returning we need to branch to after the function call
|
||||||
if(ir::return_inst* ret = dynamic_cast<ir::return_inst*>(new_inst)) {
|
if(ir::return_inst* ret = dynamic_cast<ir::return_inst*>(new_inst)) {
|
||||||
if(ir::value* ret_val = ret->get_return_value())
|
if(ir::value* ret_val = ret->get_return_value())
|
||||||
|
@@ -51,7 +51,7 @@ def get_thirdparty_packages(triton_cache_path):
|
|||||||
Package("pybind11", "pybind11-2.10.0", "https://github.com/pybind/pybind11/archive/refs/tags/v2.10.0.tar.gz", "include/pybind11/pybind11.h", "PYBIND11_INCLUDE_DIR", "")
|
Package("pybind11", "pybind11-2.10.0", "https://github.com/pybind/pybind11/archive/refs/tags/v2.10.0.tar.gz", "include/pybind11/pybind11.h", "PYBIND11_INCLUDE_DIR", "")
|
||||||
]
|
]
|
||||||
if not use_system_llvm():
|
if not use_system_llvm():
|
||||||
# donwload LLVM if no suitable system LLVM is installed
|
# download LLVM if no suitable system LLVM is installed
|
||||||
packages.append(
|
packages.append(
|
||||||
Package("llvm", "clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04", "https://github.com/llvm/llvm-project/releases/download/llvmorg-11.0.1/clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz", "lib", "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR")
|
Package("llvm", "clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04", "https://github.com/llvm/llvm-project/releases/download/llvmorg-11.0.1/clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz", "lib", "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR")
|
||||||
)
|
)
|
||||||
|
@@ -232,7 +232,7 @@ ir::value *store(ir::value *ptr, ir::value *val, std::optional<ir::value *> _mas
|
|||||||
----------------------------------------------*/
|
----------------------------------------------*/
|
||||||
std::string dot_docstr = R"pbdoc(
|
std::string dot_docstr = R"pbdoc(
|
||||||
Returns the matrix product of two blocks.
|
Returns the matrix product of two blocks.
|
||||||
The two blocks must be two dimensionals and have compatible inner dimensions.
|
The two blocks must be two dimensions and have compatible inner dimensions.
|
||||||
|
|
||||||
:param input: The first block to be multiplied.
|
:param input: The first block to be multiplied.
|
||||||
:type input: 2D block of scalar-type in {`float16`, `float32`}
|
:type input: 2D block of scalar-type in {`float16`, `float32`}
|
||||||
|
@@ -1126,7 +1126,7 @@ class CacheManager:
|
|||||||
os.rename(filepath + ".tmp", filepath)
|
os.rename(filepath + ".tmp", filepath)
|
||||||
|
|
||||||
|
|
||||||
# utilties for generating and compiling C wrappers
|
# utilities for generating and compiling C wrappers
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache()
|
@functools.lru_cache()
|
||||||
|
@@ -768,7 +768,7 @@ def dot(input, other, trans_a=False, trans_b=False, allow_tf32=True, _builder=No
|
|||||||
"""
|
"""
|
||||||
Returns the matrix product of two blocks.
|
Returns the matrix product of two blocks.
|
||||||
|
|
||||||
The two blocks must be two dimensionals and have compatible inner dimensions.
|
The two blocks must be two dimensions and have compatible inner dimensions.
|
||||||
|
|
||||||
:param input: The first tensor to be multiplied.
|
:param input: The first tensor to be multiplied.
|
||||||
:type input: 2D tensor of scalar-type in {:code:`float16`, :code:`bfloat16`, :code:`float32`}
|
:type input: 2D tensor of scalar-type in {:code:`float16`, :code:`bfloat16`, :code:`float32`}
|
||||||
|
Reference in New Issue
Block a user