FLImaging 6.5.16.1
BackendConv2D.h
1#pragma once
2
3#if _MSC_VER >= 1900 && defined(_M_X64)
4
5#include "BackendBase.h"
6#include "Parameters.h"
7
8namespace FLImaging
9{
10 namespace AI
11 {
12 #ifdef CUDNN_MODE
13 template <typename T>
14 class CCuda_Conv2D_Cudnn;
15 #endif
16
17 template <typename T>
18 class FL_EXPORT CTensor;
19
20 template <typename T>
21 class FL_EXPORT CBackendConv2D : public CBackendBase<T>
22 {
23 public:
24 CBackendConv2D();
25 CBackendConv2D(const CBackendConv2D<T>& bc);
26 virtual ~CBackendConv2D();
27
28 virtual const CResult SetConvolutionParams(const CConvolutionParameters& convParams);
29 virtual CConvolutionParameters GetConvolutionParams();
30 virtual const CResult SetTransConvolutionParams(const CTransConvolutionParameters& convParams);
31 virtual CTransConvolutionParameters GetTransConvolutionParams();
32
33 virtual const CResult Forward(CTensor<T>* pTsrX, CTensor<T>* pTsrW, CTensor<T>* pTsrY, const std::vector<int64_t>& vctYShape);
34 virtual const CResult DerivativeImage(CTensor<T>* pTsrDy, CTensor<T>* pTsrW, CTensor<T>* pTsrDx, const std::vector<int64_t>& vctDxShape, bool bAddGradient, CTensor<T>* pTsrAddGradientTemp = nullptr, CTensor<T>* pTsrKernelBuffer = nullptr);
35 virtual const CResult DerivativeKernel(CTensor<T>* pTsrDy, CTensor<T>* pTsrX, CTensor<T>* pTsrDw, const std::vector<int64_t>& vctDwShape, bool bAddGradient, CTensor<T>* pTsrAddGradientTemp = nullptr);
36
37
38
39 virtual const CResult TransConvForward(CTensor<T>* pTsrX, CTensor<T>* pTsrW, CTensor<T>* pTsrY, CTensor<T>* pTsrKernelTranspose = nullptr, CTensor<T>* pTsrKernelBuffer = nullptr);
40 virtual const CResult TransConvDerivativeImage(CTensor<T>* pTsrDy, CTensor<T>* pTsrW, CTensor<T>* pTsrDx, const std::vector<int64_t>& vctXShape, bool bAddGradient, CTensor<T>* pTsrKernelBuffer = nullptr, CTensor<T>* pTsrAddGradientTemp = nullptr);
41 virtual const CResult TransConvDerivativeKernel(CTensor<T>* pTsrDy, CTensor<T>* pTsrX, CTensor<T>* pTsrDw, const std::vector<int64_t>& vctDwShape, bool bAddGradient, CTensor<T>* pTsrInputTranspose = nullptr, CTensor<T>* pTsrKernelTranspose = nullptr, CTensor<T>* pTsrAddGradientTemp = nullptr);
42
43
44 DeclareGetClassType();
45 SupportToDuplicateObjectWithoutCreateNewObject(CBackendConv2D<T>, *this);
46
47 protected:
48 virtual const CResult ForwardGEMM(CTensor<T>* pTsrOperand, CTensor<T>* pTsrKernel, CTensor<T>* pTsrResult);
49 virtual void Im2Col(const T* pTData_im, const int32_t i32Channels, const int32_t i32Height, const int32_t i32Width, const int32_t i32Kernel_h, const int32_t i32Kernel_w, const int32_t i32Pad_h, const int32_t i32Pad_w, const int32_t i32Stride_h, const int32_t i32Stride_w, const int32_t i32Dilation_h, const int32_t i32Dilation_w, T* pTData_col, const int32_t i32OutputH=0,const int32_t i32OutputW = 0);
50 virtual void Im2ColTranspose(const T* pTData_im, const int32_t i32Channels, const int32_t i32Height, const int32_t i32Width, const int32_t i32Kernel_h, const int32_t i32Kernel_w, const int32_t i32Pad_h, const int32_t i32Pad_w, const int32_t i32Stride_h, const int32_t i32Stride_w, const int32_t i32Dilation_h, const int32_t i32Dilation_w, T* pTData_col, const int32_t i32OutputH = 0, const int32_t i32OutputW = 0);
51 virtual void Col2Im(const T* PTData_col, const int32_t i32Channels, const int32_t i32Height, const int32_t width, const int32_t i32Kernel_h, const int32_t i32Kernel_w,const int32_t i32Pad_h, const int32_t i32Pad_w,const int32_t i32Stride_h, const int32_t i32Stride_w,const int32_t i32Dilation_h, const int32_t i32Dilation_w,T* pTData_im, const int32_t i32OutputH = 0, const int32_t i32OutputW = 0);
52
53 virtual void GEMM(int32_t i32M, int32_t i32N, int32_t i32K, T tAlpha, const T* pTA, int32_t i32Lda, const T* pTB, int32_t i32Ldb, T BETA, T* pTC, int32_t i32Ldc);
54
55 protected:
56 virtual const CResult ForwardConvImpGEMMCPU(CTensor<T>* pTsrX, CTensor<T>* pTsrW, CTensor<T>* pTsrY);
57 virtual void GEMMUnit16MostN(int32_t i32M, int32_t i32N, int32_t i32K, const T* pTA, int32_t i32Lda, const T* pTB, int32_t i32Ldb, T BETA, T* pTC, int32_t i32Ldc);
58 virtual void GEMMUnit16MostM(int32_t i32M, int32_t i32N, int32_t i32K, const T* pTA, int32_t i32Lda, const T* pTB, int32_t i32Ldb, T BETA, T* pTC, int32_t i32Ldc);
59
60 virtual void GEMMUnit36MostN(int32_t i32M, int32_t i32N, int32_t i32K, const T* pTA, int32_t i32Lda, const T* pTB, int32_t i32Ldb, T BETA, T* pTC, int32_t i32Ldc);
61 virtual void GEMMUnit36MostM(int32_t i32M, int32_t i32N, int32_t i32K, const T* pTA, int32_t i32Lda, const T* pTB, int32_t i32Ldb, T BETA, T* pTC, int32_t i32Ldc);
62
63 virtual void DyToTransformBatchColumn(const T* pTDy, T* pTDyTransform, int64_t i64DyBatch, int64_t i64DyChannel, int64_t i64DyHeight, int64_t i64DyWidth, int64_t i64DyTFColumn);
64 virtual void DyToTransform4x4BatchRow(const T* pTDy, T* pTDyTransform, int64_t i64DyBatch, int64_t i64DyChannel, int64_t i64DyHeight, int64_t i64DyWidth, int64_t i64DyTFColumn);
65 virtual void DyToTransform6x6BatchRow(const T* pTDy, T* pTDyTransform, int64_t i64DyBatch, int64_t i64DyChannel, int64_t i64DyHeight, int64_t i64DyWidth, int64_t i64DyTFColumn);
66
67 virtual void KernelToTransform4x4(const T* pTKernel, T* pTTransform, int64_t i64KernelBatch, int64_t i64Ch, int64_t i64KernelHeight, int64_t i64KernelWidth);
68 virtual void KernelToTransform6x6(const T* pTKernel, T* pTTransform, int64_t i64KernelBatch, int64_t i64Ch, int64_t i64KernelHeight, int64_t i64KernelWidth);
69
70 virtual void InputToTransform6x6ChRow(const T* pTInput, T* pTTransform, int64_t i64Ch, int64_t i64Height, int64_t i64Width, int64_t i64PadH, int64_t i64PadW);
71 virtual void InputToTransformChRow(const T* pTInput, T* pTTransform, int64_t i64Ch, int64_t i64Height, int64_t i64Width, int64_t i64PadH, int64_t i64PadW);
72 virtual void InputToTransform6x6BatchRow(const T* pTInput, T* pTTransform, int64_t i64Ch, int64_t i64Height, int64_t i64Width, int64_t i64PadH, int64_t i64PadW);
73 virtual void InputToTransformBatchRow(const T* pTInput, T* pTTransform, int64_t i64Batch, int64_t i64Ch, int64_t i64Height, int64_t i64Width, int64_t i64PadH, int64_t i64PadW);
74 virtual void InputToTransformChColumn(const T* pTInput, T* pTTransform, int64_t i64Ch, int64_t i64Height, int64_t i64Width, int64_t i64PadH, int64_t i64PadW);
75
76 virtual void YTransformToOutput4x4(const T* pTOutputTransform, T* pTOutput, int64_t i64OutputCh, int64_t i64InputCh, int64_t i64InputH, int64_t i64InputW, int64_t i64PadH, int64_t i64PadW);
77 virtual void YTransformToOutput6x6(const T* pTOutputTransform, T* pTOutput, int64_t i64OutputCh, int64_t i64InputCh, int64_t i64InputH, int64_t i64InputW, int64_t i64PadH, int64_t i64PadW);
78 virtual const CResult Forward_ConvWinograd(CTensor<T>* pTsrX, CTensor<T>* pTsrW, CTensor<T>* pTsrY);
79 virtual const CResult Forward_ConvDirect(CTensor<T>* pTsrX, CTensor<T>* pTsrW, CTensor<T>* pTsrY);
80 virtual const CResult DerivativeImage_ConvGEMM(CTensor<T>* pTsrDy, CTensor<T>* pTsrW, CTensor<T>* pTsrDx, CTensor<T>* pTsrKernelBuffer);
81 virtual const CResult DerivativeImage_Direct(CTensor<T>* pTsrDy, CTensor<T>* pTsrW, CTensor<T>* pTsrDx);
82 virtual const CResult DerivativeImage_Winograd(CTensor<T>* pTsrDy, CTensor<T>* pTsrW, CTensor<T>* pTsrDx, CTensor<T>* pTsrKernelBuffer);
83 virtual const CResult DerivativeKernel_ConvGEMM(CTensor<T>* pTsrDy, CTensor<T>* pTsrX, CTensor<T>* pTsrDw);
84 virtual const CResult DerivativeKernel_Direct(CTensor<T>* pTsrDy, CTensor<T>* pTsrX, CTensor<T>* pTsrDw);
85 virtual const CResult DerivativeKernel_Winograd(CTensor<T>* pTsrDy, CTensor<T>* pTsrX, CTensor<T>* pTsrDw);
86
87 // atrous
88 virtual const CResult Forward_AtrousConvDirect(CTensor<T>* pTsrOperand, CTensor<T>* pTsrKernel, CTensor<T>* pTsrResult);
89 virtual const CResult DerivativeImage_Direct_Atrous(CTensor<T>* pTsrDy, CTensor<T>* pTsrW, CTensor<T>* pTsrDx);
90 virtual const CResult DerivativeKernel_Direct_Atrous(CTensor<T>* pTsrDy, CTensor<T>* pTsrX, CTensor<T>* pTsrDw);
91
92 //Group
93 virtual const CResult Forward_GroupConvDirect(CTensor<T>* pTsrOperand, CTensor<T>* pTsrKernel, CTensor<T>* pTsrResult);
94 virtual const CResult DerivativeImage_Direct_Group(CTensor<T>* pTsrDy, CTensor<T>* pTsrW, CTensor<T>* pTsrDx);
95 virtual const CResult DerivativeKernel_Direct_Group(CTensor<T>* pTsrDy, CTensor<T>* pTsrX, CTensor<T>* pTsrDw);
96
97 virtual const CResult ForwardGEMM_Group(CTensor<T>* pTsrOperand, CTensor<T>* pTsrKernel, CTensor<T>* pTsrResult);
98 virtual const CResult DerivativeImage_ConvGEMM_Group(CTensor<T>* pTsrDy, CTensor<T>* pTsrW, CTensor<T>* pTsrDx, CTensor<T>* pTsrKernelBuffer);
99 virtual const CResult DerivativeKernel_ConvGEMM_Group(CTensor<T>* pTsrDy, CTensor<T>* pTsrX, CTensor<T>* pTsrDw);
100
101 virtual const CResult Forward_ConvWinograd_Group(CTensor<T>* pTsrX, CTensor<T>* pTsrW, CTensor<T>* pTsrY);
102 virtual const CResult DerivativeImage_Winograd_Group(CTensor<T>* pTsrDy, CTensor<T>* pTsrW, CTensor<T>* pTsrDx, CTensor<T>* pTsrKernelBuffer);
103 virtual const CResult DerivativeKernel_Winograd_Group(CTensor<T>* pTsrDy, CTensor<T>* pTsrX, CTensor<T>* pTsrDw);
104
105 protected:
106 CConvolutionParameters m_convParams;
107 int64_t m_i64OutputPaddingY;
108 int64_t m_i64OutputPaddingX;
109
110 #ifdef CUDNN_MODE
111 CCuda_Conv2D_Cudnn<T>* m_pCudnn;
112 #endif
113 };
114 }
115}
116
117#endif