//===- MatmulOptimizer.h -------------------------------------------------===//
 
//
 
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 
// See https://llvm.org/LICENSE.txt for license information.
 
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
//
 
//===----------------------------------------------------------------------===//
 
 
 
#ifndef POLLY_MATMULOPTIMIZER_H
 
#define POLLY_MATMULOPTIMIZER_H
 
 
 
#include "isl/isl-noexceptions.h"
 
 
 
namespace llvm {
 
class TargetTransformInfo;
 
}
 
 
 
namespace polly {
 
class Dependences;
 
 
 
/// Apply the BLIS matmul optimization pattern if possible.
 
///
 
/// Make the loops containing the matrix multiplication be the innermost
 
/// loops and apply the BLIS matmul optimization pattern. BLIS implements
 
/// gemm as three nested loops around a macro-kernel, plus two packing
 
/// routines. The macro-kernel is implemented in terms of two additional
 
/// loops around a micro-kernel. The micro-kernel is a loop around a rank-1
 
/// (i.e., outer product) update.
 
///
 
/// For a detailed description please see [1].
 
///
 
/// The order of the loops defines the data reused in the BLIS implementation
 
/// of gemm ([1]). In particular, elements of the matrix B, the second
 
/// operand of matrix multiplication, are reused between iterations of the
 
/// innermost loop. To keep the reused data in cache, only elements of matrix
 
/// A, the first operand of matrix multiplication, should be evicted during
 
/// an iteration of the innermost loop. To provide such a cache replacement
 
/// policy, elements of the matrix A can, in particular, be loaded first and,
 
/// consequently, be least-recently-used.
 
///
 
/// In our case matrices are stored in row-major order instead of
 
/// column-major order used in the BLIS implementation ([1]). It affects only
 
/// on the form of the BLIS micro kernel and the computation of its
 
/// parameters. In particular, reused elements of the matrix B are
 
/// successively multiplied by specific elements of the matrix A.
 
///
 
/// Refs.:
 
/// [1] - Analytical Modeling is Enough for High Performance BLIS
 
/// Tze Meng Low, Francisco D Igual, Tyler M Smith, Enrique S Quintana-Orti
 
/// Technical Report, 2014
 
/// http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf
 
///
 
/// @see ScheduleTreeOptimizer::createMicroKernel
 
/// @see ScheduleTreeOptimizer::createMacroKernel
 
/// @see getMicroKernelParams
 
/// @see getMacroKernelParams
 
///
 
/// TODO: Implement the packing transformation.
 
///
 
/// @param Node The node that contains a band to be optimized. The node
 
///             is required to successfully pass
 
///             ScheduleTreeOptimizer::isMatrMultPattern.
 
/// @param TTI  Target Transform Info.
 
/// @param D    The dependencies.
 
///
 
/// @returns    The transformed schedule or nullptr if the optimization
 
///             cannot be applied.
 
isl::schedule_node
 
tryOptimizeMatMulPattern(isl::schedule_node Node,
 
                         const llvm::TargetTransformInfo *TTI,
 
                         const Dependences *D);
 
 
 
} // namespace polly
 
#endif // POLLY_MATMULOPTIMIZER_H