Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- MatmulOptimizer.h -------------------------------------------------===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | |||
9 | #ifndef POLLY_MATMULOPTIMIZER_H |
||
10 | #define POLLY_MATMULOPTIMIZER_H |
||
11 | |||
12 | #include "isl/isl-noexceptions.h" |
||
13 | |||
14 | namespace llvm { |
||
15 | class TargetTransformInfo; |
||
16 | } |
||
17 | |||
18 | namespace polly { |
||
19 | class Dependences; |
||
20 | |||
21 | /// Apply the BLIS matmul optimization pattern if possible. |
||
22 | /// |
||
23 | /// Make the loops containing the matrix multiplication be the innermost |
||
24 | /// loops and apply the BLIS matmul optimization pattern. BLIS implements |
||
25 | /// gemm as three nested loops around a macro-kernel, plus two packing |
||
26 | /// routines. The macro-kernel is implemented in terms of two additional |
||
27 | /// loops around a micro-kernel. The micro-kernel is a loop around a rank-1 |
||
28 | /// (i.e., outer product) update. |
||
29 | /// |
||
30 | /// For a detailed description please see [1]. |
||
31 | /// |
||
32 | /// The order of the loops defines the data reused in the BLIS implementation |
||
33 | /// of gemm ([1]). In particular, elements of the matrix B, the second |
||
34 | /// operand of matrix multiplication, are reused between iterations of the |
||
35 | /// innermost loop. To keep the reused data in cache, only elements of matrix |
||
36 | /// A, the first operand of matrix multiplication, should be evicted during |
||
37 | /// an iteration of the innermost loop. To provide such a cache replacement |
||
38 | /// policy, elements of the matrix A can, in particular, be loaded first and, |
||
39 | /// consequently, be least-recently-used. |
||
40 | /// |
||
41 | /// In our case matrices are stored in row-major order instead of |
||
42 | /// column-major order used in the BLIS implementation ([1]). It affects only |
||
43 | /// on the form of the BLIS micro kernel and the computation of its |
||
44 | /// parameters. In particular, reused elements of the matrix B are |
||
45 | /// successively multiplied by specific elements of the matrix A. |
||
46 | /// |
||
47 | /// Refs.: |
||
48 | /// [1] - Analytical Modeling is Enough for High Performance BLIS |
||
49 | /// Tze Meng Low, Francisco D Igual, Tyler M Smith, Enrique S Quintana-Orti |
||
50 | /// Technical Report, 2014 |
||
51 | /// http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf |
||
52 | /// |
||
53 | /// @see ScheduleTreeOptimizer::createMicroKernel |
||
54 | /// @see ScheduleTreeOptimizer::createMacroKernel |
||
55 | /// @see getMicroKernelParams |
||
56 | /// @see getMacroKernelParams |
||
57 | /// |
||
58 | /// TODO: Implement the packing transformation. |
||
59 | /// |
||
60 | /// @param Node The node that contains a band to be optimized. The node |
||
61 | /// is required to successfully pass |
||
62 | /// ScheduleTreeOptimizer::isMatrMultPattern. |
||
63 | /// @param TTI Target Transform Info. |
||
64 | /// @param D The dependencies. |
||
65 | /// |
||
66 | /// @returns The transformed schedule or nullptr if the optimization |
||
67 | /// cannot be applied. |
||
68 | isl::schedule_node |
||
69 | tryOptimizeMatMulPattern(isl::schedule_node Node, |
||
70 | const llvm::TargetTransformInfo *TTI, |
||
71 | const Dependences *D); |
||
72 | |||
73 | } // namespace polly |
||
74 | #endif // POLLY_MATMULOPTIMIZER_H |