3#if _MSC_VER >= 1900 && defined(_M_X64)
5#include "BackendBase.h"
13 class CCuda_ComputationalGraphBatchNorm2D_Cudnn;
17 class FL_EXPORT CTensor;
20 class FL_EXPORT CBackendBase;
23 class FL_EXPORT CBackendBatchNorm2D :
public CBackendBase<T>
27 CBackendBatchNorm2D();
28 CBackendBatchNorm2D(
const CBackendBatchNorm2D<T>& bbn);
29 virtual ~CBackendBatchNorm2D();
31 virtual const CResult SetEpsilon(T tEpsilon);
32 virtual const CResult SetMomentum(T tMomentum);
33 virtual const CResult UseAffine(
bool bAffine);
34 virtual T GetEpsilon();
35 virtual T GetMomentum();
36 virtual bool IsAffineUsed();
38 virtual const CResult Forward(CTensor<T>* pTsrX, CTensor<T>* pTsrWeight, CTensor<T>* pTsrBias, CTensor<T>* pTsrY, CTensor<T>* pTsrVarBuffer, CTensor<T>* pTsrMeanBuffer, CTensor<T>* pTsrInferVar, CTensor<T>* pTsrInferMean,
bool bTrainingModeEnabled);
39 virtual const CResult Derivative(CTensor<T>* pTsrX, CTensor<T>* pTsrWeight, CTensor<T>* pTsrBias,
40 CTensor<T>* pTsrVarBuffer, CTensor<T>* pTsrMeanBuffer, CTensor<T>* pTsrInferVar, CTensor<T>* pTsrInferMean,
41 CTensor<T>* pTsrDy, CTensor<T>* pTsrDx, CTensor<T>* pTsrDw, CTensor<T>* pTsrDb,
bool bTrainingModeEnabled,
42 bool bAddGradient,
bool bWeightAddGradient,
bool bBiasAddGradient, CTensor<T>* pTsrAddGradientTemp =
nullptr, CTensor<T>* pTsrWeightAddGradientTemp =
nullptr, CTensor<T>* pTsrBiasAddGradientTemp =
nullptr);
45 virtual const CResult BatchNorm2D_CUDA(CTensor<T>* pTsrX, CTensor<T>* pTsrWeight, CTensor<T>* pTsrBias, CTensor<T>* pTsrY, CTensor<T>* pTsrVarBuffer, CTensor<T>* pTsrMeanBuffer, CTensor<T>* pTsrInferVar, CTensor<T>* pTsrInferMean,
bool bTrainingModeEnabled);
46 virtual const CResult BatchNorm2D_CPU(CTensor<T>* pTsrX, CTensor<T>* pTsrWeight, CTensor<T>* pTsrBias, CTensor<T>* pTsrY, CTensor<T>* pTsrVarBuffer, CTensor<T>* pTsrMeanBuffer, CTensor<T>* pTsrInferVar, CTensor<T>* pTsrInferMean,
bool bTrainingModeEnabled);
48 virtual const CResult Derivative_CUDA(CTensor<T>* pTsrX, CTensor<T>* pTsrWeight, CTensor<T>* pTsrBias,
49 CTensor<T>* pTsrVarBuffer, CTensor<T>* pTsrMeanBuffer, CTensor<T>* pTsrInferVar, CTensor<T>* pTsrInferMean,
50 CTensor<T>* pTsrDy, CTensor<T>* pTsrDx, CTensor<T>* pTsrDw, CTensor<T>* pTsrDb,
bool bTrainingModeEnabled,
bool bAddGradient,
bool bWeightAddGradient,
bool bBiasAddGradient);
51 virtual const CResult Derivative_CPU(CTensor<T>* pTsrX, CTensor<T>* pTsrWeight, CTensor<T>* pTsrBias,
52 CTensor<T>* pTsrVarBuffer, CTensor<T>* pTsrMeanBuffer, CTensor<T>* pTsrInferVar, CTensor<T>* pTsrInferMean,
53 CTensor<T>* pTsrDy, CTensor<T>* pTsrDx, CTensor<T>* pTsrDw, CTensor<T>* pTsrDb,
bool bTrainingModeEnabled);
56 DeclareGetClassType();
57 SupportToDuplicateObjectWithoutCreateNewObject(CBackendBatchNorm2D<T>, *
this);
65 CCuda_ComputationalGraphBatchNorm2D_Cudnn<T>* m_pCudnn;