#include "nvperfapi_common_types.h"
Go to the source code of this file.
Devices. | |
enum | NVPA_DeviceAttr { NVPA_DEVICE_ATTR_INVALID = 0, NVPA_DEVICE_ATTR_NUM_SM, NVPA_DEVICE_ATTR__COUNT } |
NVPA_Status | NVPA_GetNumDevices (size_t *pNumDevices) |
NVPA_Status | NVPA_Device_GetName (size_t deviceIndex, const char **ppDeviceName) |
NVPA_Status | NVPA_Device_GetChipName (size_t deviceIndex, const char **ppChipName) |
NVPA_Status | NVPA_Device_GetAttribute (size_t deviceIndex, NVPA_DeviceAttr attr, uint64_t *pValue) |
Initialization. | |
Calling any of the NVPA_*_LoadDriver() functions enables additional hooks in the driver needed for profiling; these hooks may degrade overall performance or framerate. | |
typedef void(* | NVPA_ErrorHandler )(NVPA_Status) |
NVPA_Status | NVPA_Init (void) |
NVPA_Status | NVPA_GetVersionNumber (int *major, int *minor, int *rev) |
NVPA_Status | NVPA_D3D11_LoadDriver (void) |
NVPA_Status | NVPA_D3D12_LoadDriver (void) |
NVPA_Status | NVPA_OpenGL_LoadDriver (void) |
NVPA_Status | NVPA_EGL_LoadDriver (void) |
NVPA_Status | NVPA_CUDA_LoadDriver (void) |
NVPA_LOCAL NVPA_Status | NVPA_SetErrorHandler (NVPA_ErrorHandler errorHandler) |
NVPA_LOCAL NVPA_Status | NVPA_SetLibraryLoadPaths (size_t numPaths, const char **ppPaths) |
NVPA_LOCAL NVPA_Status | NVPA_SetLibraryLoadPathsW (size_t numPaths, const wchar_t **ppwPaths) |
Graphics object registration. | |
A graphics/compute API object must be registered with NvPerfAPI before it is passed to any other functions that take a context argument of that type. The registration only needs to occur once per object, and is typically performed right after creation. | |
NVPA_Status | NVPA_D3D11_Register (struct ID3D11DeviceContext *pDeviceContext) |
NVPA_Status | NVPA_D3D11_Unregister (struct ID3D11DeviceContext *pDeviceContext) |
NVPA_Status | NVPA_D3D12_Queue_Register (struct ID3D12CommandQueue *pCommandQueue) |
NVPA_Status | NVPA_D3D12_Queue_Unregister (struct ID3D12CommandQueue *pCommandQueue) |
NVPA_Status | NVPA_D3D12_CommandList_Register (struct ID3D12GraphicsCommandList *pCommandList) |
NVPA_Status | NVPA_D3D12_CommandList_Unregister (struct ID3D12GraphicsCommandList *pCommandList) |
NVPA_Status | NVPA_OpenGL_Register (void) |
NVPA_Status | NVPA_OpenGL_Unregister (void) |
NVPA_Status | NVPA_EGL_Register (void) |
NVPA_Status | NVPA_EGL_Unregister (void) |
NVPA_Status | NVPA_CUDA_Register (struct CUctx_st *ctx) |
NVPA_Status | NVPA_CUDA_Unregister (struct CUctx_st *ctx) |
Context interaction. | |
NVPA_Status | NVPA_D3D11_GetConfig (struct ID3D11DeviceContext *pDeviceContext, const NVPA_Config **ppConfig) |
NVPA_Status | NVPA_D3D12_Queue_GetConfig (struct ID3D12CommandQueue *pCommandQueue, const NVPA_Config **ppConfig) |
NVPA_Status | NVPA_OpenGL_GetConfig (const NVPA_Config **ppConfig) |
NVPA_Status | NVPA_EGL_GetConfig (const NVPA_Config **ppConfig) |
NVPA_Status | NVPA_CUDA_GetConfig (const NVPA_Config **ppConfig, struct CUctx_st *ctx) |
NVPA_Status | NVPA_OpenGL_GetCurrentContext (void **ppGLContext) |
NVPA_Status | NVPA_D3D11_GetSliDeviceCount (struct ID3D11DeviceContext *pDeviceContext, size_t *pSliDeviceCount) |
NVPA_Status | NVPA_D3D12_Queue_GetSliDeviceCount (struct ID3D12CommandQueue *pCommandQueue, size_t *pSliDeviceCount) |
NVPA_Status | NVPA_OpenGL_GetSliDeviceCount (size_t *pSliDeviceCount) |
NVPA_Status | NVPA_EGL_GetSliDeviceCount (size_t *pSliDeviceCount) |
NVPA_Status | NVPA_CUDA_GetSliDeviceCount (size_t *pSliDeviceCount, struct CUctx_st *ctx) |
NVPA_Status | NVPA_D3D11_GetDeviceIndex (struct ID3D11DeviceContext *pDeviceContext, size_t sliIndex, size_t *pDeviceIndex) |
NVPA_Status | NVPA_D3D12_Queue_GetDeviceIndex (struct ID3D12CommandQueue *pCommandQueue, size_t sliIndex, size_t *pDeviceIndex) |
NVPA_Status | NVPA_OpenGL_GetDeviceIndex (size_t sliIndex, size_t *pDeviceIndex) |
NVPA_Status | NVPA_EGL_GetDeviceIndex (size_t sliIndex, size_t *pDeviceIndex) |
NVPA_Status | NVPA_CUDA_GetDeviceIndex (size_t sliIndex, size_t *pDeviceIndex, struct CUctx_st *ctx) |
NVPA_Status | NVPA_D3D11_Finish (struct ID3D11DeviceContext *pDeviceContext) |
NVPA_Status | NVPA_D3D12_Queue_Finish (struct ID3D12CommandQueue *pCommandQueue) |
NVPA_Status | NVPA_D3D12_CommandList_EnableAutoRangesDraw (struct ID3D12GraphicsCommandList *pCommandList, NVPA_Bool enable) |
NVPA_Status | NVPA_D3D12_Queue_HandleProfilerEvents (struct ID3D12CommandQueue *pCommandQueue, uint64_t timeout, size_t numPasses, NVPA_GpuEventHandlingResult *pResult) |
NVPA_Status | NVPA_OpenGL_Finish (void) |
NVPA_Status | NVPA_EGL_Finish (void) |
NVPA_Status | NVPA_CUDA_Finish (struct CUctx_st *ctx) |
typedef void(* NVPA_ErrorHandler)(NVPA_Status) |
The callback signature for NVPA_SetErrorHandler().
enum NVPA_DeviceAttr |
NVPA_Status NVPA_CUDA_Finish | ( | struct CUctx_st * | ctx | ) |
Blocks until all preceding work on the current CUDA context is complete.
[in] | ctx | if NULL, the current CUDA context is used |
This function guarantees that metric values for all preceding calls are subsequently available. You may also use it as a general-purpose synchronization primitive.
NVPA_Status NVPA_CUDA_GetConfig | ( | const NVPA_Config ** | ppConfig, | |
struct CUctx_st * | ctx | |||
) |
Returns the current CUDA context's active Config.
[in] | ppConfig | |
[in] | ctx | if NULL, the current CUDA context is used |
NVPA_Status NVPA_CUDA_GetDeviceIndex | ( | size_t | sliIndex, | |
size_t * | pDeviceIndex, | |||
struct CUctx_st * | ctx | |||
) |
Returns a system-wide device index corresponding to the current CUDA context.
[in] | sliIndex | |
[out] | pDeviceIndex | |
[in] | ctx | if NULL, the current CUDA context is used |
sliIndex must fall in the range [0, NVPA_CUDA_GetSliDeviceCount())
NVPA_Status NVPA_CUDA_GetSliDeviceCount | ( | size_t * | pSliDeviceCount, | |
struct CUctx_st * | ctx | |||
) |
Returns the number of SLI devices bound to the current CUDA context.
[in] | pSliDeviceCount | |
[in] | ctx | if NULL, the current CUDA context is used |
NVPA_Status NVPA_CUDA_LoadDriver | ( | void | ) |
Loads the underlying driver for the CUDA API.
To profile CUDA, this function must be called before the first CUDA function call. On Linux, you must call cuInit(0) before calling this function.
NVPA_Status NVPA_CUDA_Register | ( | struct CUctx_st * | ctx | ) |
Registers the current CUDA context for use with NvPerfAPI.
[in] | ctx | if NULL, the current CUDA context is used |
NVPA_Status NVPA_CUDA_Unregister | ( | struct CUctx_st * | ctx | ) |
Unregisters the current CUDA context from NvPerfAPI.
[in] | ctx | if NULL, the current CUDA context is used |
NVPA_Status NVPA_D3D11_Finish | ( | struct ID3D11DeviceContext * | pDeviceContext | ) |
Blocks until all preceding work on pDeviceContext is complete.
This function is the equivalent to `glFinish()`, but works on D3D11.
This function guarantees that metric values for all preceding calls are subsequently available. You may also use it as a general-purpose synchronization primitive.
NVPA_Status NVPA_D3D11_GetConfig | ( | struct ID3D11DeviceContext * | pDeviceContext, | |
const NVPA_Config ** | ppConfig | |||
) |
Returns pDeviceContext's active Config.
The returned *ppConfig has its refcount incremented by 1. You must release the refcount by calling NVPA_Config_Release().
NVPA_Status NVPA_D3D11_GetDeviceIndex | ( | struct ID3D11DeviceContext * | pDeviceContext, | |
size_t | sliIndex, | |||
size_t * | pDeviceIndex | |||
) |
Returns a system-wide device index corresponding to pDeviceContext.
For single-GPU or non-SLI systems, sliIndex must be 0.
sliIndex must fall in the range [0, NVPA_D3D11_GetSliDeviceCount())
NVPA_Status NVPA_D3D11_GetSliDeviceCount | ( | struct ID3D11DeviceContext * | pDeviceContext, | |
size_t * | pSliDeviceCount | |||
) |
Returns the number of SLI devices bound to pDeviceContext.
NVPA_Status NVPA_D3D11_LoadDriver | ( | void | ) |
Loads the underlying driver for the D3D11 API.
To profile D3D11, this function must be called before the first call to any D3D or DXGI function.
NVPA_Status NVPA_D3D11_Register | ( | struct ID3D11DeviceContext * | pDeviceContext | ) |
Registers pDeviceContext for use with NvPerfAPI.
NVPA_Status NVPA_D3D11_Unregister | ( | struct ID3D11DeviceContext * | pDeviceContext | ) |
Unregisters pDeviceContext from NvPerfAPI.
Make sure to call this before pDeviceContext is destroyed using the D3D11 API.
NVPA_Status NVPA_D3D12_CommandList_EnableAutoRangesDraw | ( | struct ID3D12GraphicsCommandList * | pCommandList, | |
NVPA_Bool | enable | |||
) |
Controls whether subsequent draw calls will be profiled in AutoRangesDraw mode.
By default:
This function may only be called during CommandList construction before pCommandList->Close() has been called.
NVPA_Status NVPA_D3D12_CommandList_Register | ( | struct ID3D12GraphicsCommandList * | pCommandList | ) |
Registers pCommandList for use with NvPerfAPI.
Make sure to call this after the associated call to ID3D12Device::CreateCommandList() that created pCommandList, but before any subsequent calls to ID3D12Device::CreateCommandList().
NVPA_Status NVPA_D3D12_CommandList_Unregister | ( | struct ID3D12GraphicsCommandList * | pCommandList | ) |
Unregisters pCommandList from NvPerfAPI.
Make sure to call this before pCommandList is destroyed using the D3D12 API.
NVPA_Status NVPA_D3D12_LoadDriver | ( | void | ) |
Loads the underlying driver for the D3D12 API.
To profile D3D12, this function must be called before the first call to any D3D or DXGI function.
NVPA_Status NVPA_D3D12_Queue_Finish | ( | struct ID3D12CommandQueue * | pCommandQueue | ) |
Blocks until all preceding work on pCommandQueue is complete.
This function is the equivalent to `glFinish()`, but works on D3D12.
This function guarantees that metric values for all preceding calls are subsequently available. You may also use it as a general-purpose synchronization primitive.
NVPA_Status NVPA_D3D12_Queue_GetConfig | ( | struct ID3D12CommandQueue * | pCommandQueue, | |
const NVPA_Config ** | ppConfig | |||
) |
Returns pCommandQueue's active Config.
The returned *ppConfig has its refcount incremented by 1. You must release the refcount by calling NVPA_Config_Release().
NVPA_Status NVPA_D3D12_Queue_GetDeviceIndex | ( | struct ID3D12CommandQueue * | pCommandQueue, | |
size_t | sliIndex, | |||
size_t * | pDeviceIndex | |||
) |
Returns a system-wide device index corresponding to pCommandQueue.
For single-GPU or non-SLI systems, sliIndex must be 0.
sliIndex must fall in the range [0, NVPA_D3D12_Queue_GetSliDeviceCount())
NVPA_Status NVPA_D3D12_Queue_GetSliDeviceCount | ( | struct ID3D12CommandQueue * | pCommandQueue, | |
size_t * | pSliDeviceCount | |||
) |
Returns the number of SLI devices bound to pCommandQueue.
NVPA_Status NVPA_D3D12_Queue_HandleProfilerEvents | ( | struct ID3D12CommandQueue * | pCommandQueue, | |
uint64_t | timeout, | |||
size_t | numPasses, | |||
NVPA_GpuEventHandlingResult * | pResult | |||
) |
Handles the profiler events generated by GPU. The user must call this function when profiling D3D12.
[in] | pCommandQueue | |
[in] | timeout | time in nanoseconds, 0 for no timeout |
[in] | numPasses | number of passes, 0 for unlimited number of passes |
[out] | pResult |
NVPA_Status NVPA_D3D12_Queue_Register | ( | struct ID3D12CommandQueue * | pCommandQueue | ) |
Registers pCommandQueue for use with NvPerfAPI.
NVPA_Status NVPA_D3D12_Queue_Unregister | ( | struct ID3D12CommandQueue * | pCommandQueue | ) |
Unregisters pCommandQueue from NvPerfAPI.
Make sure to call this before pCommandQueue is destroyed using the D3D12 API.
NVPA_Status NVPA_Device_GetAttribute | ( | size_t | deviceIndex, | |
NVPA_DeviceAttr | attr, | |||
uint64_t * | pValue | |||
) |
Returns the specified device attribute.
NVPA_Status NVPA_Device_GetChipName | ( | size_t | deviceIndex, | |
const char ** | ppChipName | |||
) |
Returns the device's chip name.
[in] | deviceIndex | 0 <= deviceIndex < NVPA_GetNumDevices() |
[out] | ppChipName |
NVPA_Status NVPA_Device_GetName | ( | size_t | deviceIndex, | |
const char ** | ppDeviceName | |||
) |
Returns the device name.
NVPA_Status NVPA_EGL_Finish | ( | void | ) |
Blocks until all preceding work on the current EGL context is complete.
There must be a current EGL context at the time of this call.
This function is the equivalent to `glFinish()`.
This function guarantees that metric values for all preceding calls are subsequently available. You may also use it as a general-purpose synchronization primitive.
NVPA_Status NVPA_EGL_GetConfig | ( | const NVPA_Config ** | ppConfig | ) |
Returns the current EGL context's active Config.
There must be a current EGL context at the time of this call.
The returned *ppConfig has its refcount incremented by 1. You must release the refcount by calling NVPA_Config_Release().
NVPA_Status NVPA_EGL_GetDeviceIndex | ( | size_t | sliIndex, | |
size_t * | pDeviceIndex | |||
) |
Returns a system-wide device index corresponding to the current EGL context.
There must be a current EGL context at the time of this call.
For single-GPU or non-SLI systems, sliIndex must be 0.
sliIndex must fall in the range [0, NVPA_EGL_GetSliDeviceCount())
NVPA_Status NVPA_EGL_GetSliDeviceCount | ( | size_t * | pSliDeviceCount | ) |
Returns the number of SLI devices bound to the current EGL context.
There must be a current EGL context at the time of this call.
NVPA_Status NVPA_EGL_LoadDriver | ( | void | ) |
Loads the underlying driver for the EGL API.
To profile EGL, this function must be called before the first EGL function call.
NVPA_Status NVPA_EGL_Register | ( | void | ) |
Registers the current EGL context for use with NvPerfAPI.
There must be a current EGL context at the time of this call.
NVPA_Status NVPA_EGL_Unregister | ( | void | ) |
Unregisters the current EGL context from NvPerfAPI.
There must be a current EGL context at the time of this call.
NVPA_Status NVPA_GetNumDevices | ( | size_t * | pNumDevices | ) |
Returns number of devices.
NVPA_Status NVPA_GetVersionNumber | ( | int * | major, | |
int * | minor, | |||
int * | rev | |||
) |
Get the version number of NvPerfAPI
NVPA_Status NVPA_Init | ( | void | ) |
Loads and initializes the NvPerfAPI library.
NVPA_Status NVPA_OpenGL_Finish | ( | void | ) |
Blocks until all preceding work on the current GL context is complete.
There must be a current GL context at the time of this call.
This function is the equivalent to `glFinish()`.
This function guarantees that metric values for all preceding calls are subsequently available. You may also use it as a general-purpose synchronization primitive.
NVPA_Status NVPA_OpenGL_GetConfig | ( | const NVPA_Config ** | ppConfig | ) |
Returns the current GL context's active Config.
There must be a current GL context at the time of this call.
The returned *ppConfig has its refcount incremented by 1. You must release the refcount by calling NVPA_Config_Release().
NVPA_Status NVPA_OpenGL_GetCurrentContext | ( | void ** | ppGLContext | ) |
Returns the current OpenGL context.
Equivalent to wglGetCurrentContext(), glXGetCurrentContext(), or similar for the current platform, provided for convenience.
Only applies to OpenGL. For EGL, please call eglGetCurrentContext().
[out] | ppGLContext |
NVPA_Status NVPA_OpenGL_GetDeviceIndex | ( | size_t | sliIndex, | |
size_t * | pDeviceIndex | |||
) |
Returns a system-wide device index corresponding to the current GL context.
There must be a current EGL context at the time of this call.
For single-GPU or non-SLI systems, sliIndex must be 0.
sliIndex must fall in the range [0, NVPA_OpenGL_GetSliDeviceCount())
NVPA_Status NVPA_OpenGL_GetSliDeviceCount | ( | size_t * | pSliDeviceCount | ) |
Returns the number of SLI devices bound to the current GL context.
There must be a current GL context at the time of this call.
NVPA_Status NVPA_OpenGL_LoadDriver | ( | void | ) |
Loads the underlying driver for the OpenGL API.
To profile OpenGL, this function must be called before the first call into a GL context layer. That is, before the first WGL or GLX call.
NVPA_Status NVPA_OpenGL_Register | ( | void | ) |
Registers the current GL context for use with NvPerfAPI.
There must be a current GL context at the time of this call.
NVPA_Status NVPA_OpenGL_Unregister | ( | void | ) |
Unregisters the current GL context from NvPerfAPI.
There must be a current GL context at the time of this call.
NVPA_LOCAL NVPA_Status NVPA_SetErrorHandler | ( | NVPA_ErrorHandler | errorHandler | ) |
Sets a global error handler for every NvPerfAPI call.
Calling this function enables an extra "debug layer" that invokes your callback every time an NvPerfAPI function call fails (returns a NVPA_Status other than NVPA_STATUS_SUCCESS).
You may use this to uniformly handle errors, by e.g. asserting, throwing, logging.
Pass NULL to disable global error handling.
The ErrorHandler state is part of the static library "nvperfapi_user". Each of your executables/DLLs/DSOs that links against "nvperfapi_user" will have an independently settable ErrorHandler.
NVPA_LOCAL NVPA_Status NVPA_SetLibraryLoadPaths | ( | size_t | numPaths, | |
const char ** | ppPaths | |||
) |
Sets library search path for NVPA_Init().
NVPA_Init() loads the PerfWorks DLL/DSO. This function sets ordered paths that will be searched with LoadLibrary() or dl_open() call.
If load paths are set by this function, the default set of load paths will not be attempted.
Each path must point at a directory (not a file name).
This function is not thread-safe. See nvperfapi_user.c for details.
Example Usage:
const char* paths[] = { "path1", "path2", etc }; NVPA_SetLibraryLoadPaths(sizeof(paths)/sizeof(const char*), paths); NVPA_Init(); NVPA_SetLibraryLoadPaths(0, NULL);
NVPA_LOCAL NVPA_Status NVPA_SetLibraryLoadPathsW | ( | size_t | numPaths, | |
const wchar_t ** | ppwPaths | |||
) |
Sets library search path for NVPA_Init().
NVPA_Init() loads the PerfWorks DLL/DSO. This function sets ordered paths that will be searched with LoadLibrary() or dl_open() call.
This function is the wchar_t variant of NVPA_SetLibraryLoadPaths.
If load paths are set by this function, the default set of load paths will not be attempted.
Each path must point at a directory (not a file name).
This function is not thread-safe. See nvperfapi_user.c for details.
Example Usage:
const wchar_t* wpaths[] = { L"path1", L"path2", etc }; NVPA_SetLibraryLoadPathsW(sizeof(paths)/sizeof(const wchar_t*), wpaths); NVPA_Init(); NVPA_SetLibraryLoadPathsW(0, NULL);