Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===- MatmulOptimizer.h -------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8.  
  9. #ifndef POLLY_MATMULOPTIMIZER_H
  10. #define POLLY_MATMULOPTIMIZER_H
  11.  
  12. #include "isl/isl-noexceptions.h"
  13.  
  14. namespace llvm {
  15. class TargetTransformInfo;
  16. }
  17.  
  18. namespace polly {
  19. class Dependences;
  20.  
  21. /// Apply the BLIS matmul optimization pattern if possible.
  22. ///
  23. /// Make the loops containing the matrix multiplication be the innermost
  24. /// loops and apply the BLIS matmul optimization pattern. BLIS implements
  25. /// gemm as three nested loops around a macro-kernel, plus two packing
  26. /// routines. The macro-kernel is implemented in terms of two additional
  27. /// loops around a micro-kernel. The micro-kernel is a loop around a rank-1
  28. /// (i.e., outer product) update.
  29. ///
  30. /// For a detailed description please see [1].
  31. ///
  32. /// The order of the loops defines the data reused in the BLIS implementation
  33. /// of gemm ([1]). In particular, elements of the matrix B, the second
  34. /// operand of matrix multiplication, are reused between iterations of the
  35. /// innermost loop. To keep the reused data in cache, only elements of matrix
  36. /// A, the first operand of matrix multiplication, should be evicted during
  37. /// an iteration of the innermost loop. To provide such a cache replacement
  38. /// policy, elements of the matrix A can, in particular, be loaded first and,
  39. /// consequently, be least-recently-used.
  40. ///
  41. /// In our case matrices are stored in row-major order instead of
  42. /// column-major order used in the BLIS implementation ([1]). It affects only
  43. /// on the form of the BLIS micro kernel and the computation of its
  44. /// parameters. In particular, reused elements of the matrix B are
  45. /// successively multiplied by specific elements of the matrix A.
  46. ///
  47. /// Refs.:
  48. /// [1] - Analytical Modeling is Enough for High Performance BLIS
  49. /// Tze Meng Low, Francisco D Igual, Tyler M Smith, Enrique S Quintana-Orti
  50. /// Technical Report, 2014
  51. /// http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf
  52. ///
  53. /// @see ScheduleTreeOptimizer::createMicroKernel
  54. /// @see ScheduleTreeOptimizer::createMacroKernel
  55. /// @see getMicroKernelParams
  56. /// @see getMacroKernelParams
  57. ///
  58. /// TODO: Implement the packing transformation.
  59. ///
  60. /// @param Node The node that contains a band to be optimized. The node
  61. ///             is required to successfully pass
  62. ///             ScheduleTreeOptimizer::isMatrMultPattern.
  63. /// @param TTI  Target Transform Info.
  64. /// @param D    The dependencies.
  65. ///
  66. /// @returns    The transformed schedule or nullptr if the optimization
  67. ///             cannot be applied.
  68. isl::schedule_node
  69. tryOptimizeMatMulPattern(isl::schedule_node Node,
  70.                          const llvm::TargetTransformInfo *TTI,
  71.                          const Dependences *D);
  72.  
  73. } // namespace polly
  74. #endif // POLLY_MATMULOPTIMIZER_H
  75.