Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- LoopGeneratorsKMP.h - IR helper to create loops ----------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file contains functions to create scalar and OpenMP parallel loops |
||
10 | // as LLVM-IR. |
||
11 | // |
||
12 | //===----------------------------------------------------------------------===// |
||
13 | #ifndef POLLY_LOOP_GENERATORS_KMP_H |
||
14 | #define POLLY_LOOP_GENERATORS_KMP_H |
||
15 | |||
16 | #include "polly/CodeGen/IRBuilder.h" |
||
17 | #include "polly/CodeGen/LoopGenerators.h" |
||
18 | #include "polly/Support/ScopHelper.h" |
||
19 | #include "llvm/ADT/SetVector.h" |
||
20 | |||
21 | namespace polly { |
||
22 | using llvm::GlobalValue; |
||
23 | using llvm::GlobalVariable; |
||
24 | |||
25 | /// This ParallelLoopGenerator subclass handles the generation of parallelized |
||
26 | /// code, utilizing the LLVM OpenMP library. |
||
27 | class ParallelLoopGeneratorKMP final : public ParallelLoopGenerator { |
||
28 | public: |
||
29 | /// Create a parallel loop generator for the current function. |
||
30 | ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI, |
||
31 | DominatorTree &DT, const DataLayout &DL) |
||
32 | : ParallelLoopGenerator(Builder, LI, DT, DL) { |
||
33 | SourceLocationInfo = createSourceLocation(); |
||
34 | } |
||
35 | |||
36 | protected: |
||
37 | /// The source location struct of this loop. |
||
38 | /// ident_t = type { i32, i32, i32, i32, i8* } |
||
39 | GlobalValue *SourceLocationInfo; |
||
40 | |||
41 | /// Convert the combination of given chunk size and scheduling type (which |
||
42 | /// might have been set via the command line) into the corresponding |
||
43 | /// scheduling type. This may result (e.g.) in a 'change' from |
||
44 | /// "static chunked" scheduling to "static non-chunked" (regarding the |
||
45 | /// provided and returned scheduling types). |
||
46 | /// |
||
47 | /// @param ChunkSize The chunk size, set via command line or its default. |
||
48 | /// @param Scheduling The scheduling, set via command line or its default. |
||
49 | /// |
||
50 | /// @return The corresponding OMPGeneralSchedulingType. |
||
51 | OMPGeneralSchedulingType |
||
52 | getSchedType(int ChunkSize, OMPGeneralSchedulingType Scheduling) const; |
||
53 | |||
54 | /// Returns True if 'LongType' is 64bit wide, otherwise: False. |
||
55 | bool is64BitArch(); |
||
56 | |||
57 | public: |
||
58 | // The functions below may be used if one does not want to generate a |
||
59 | // specific OpenMP parallel loop, but generate individual parts of it |
||
60 | // (e.g. the subfunction definition). |
||
61 | |||
62 | /// Create a runtime library call to spawn the worker threads. |
||
63 | /// |
||
64 | /// @param SubFn The subfunction which holds the loop body. |
||
65 | /// @param SubFnParam The parameter for the subfunction (basically the struct |
||
66 | /// filled with the outside values). |
||
67 | /// @param LB The lower bound for the loop we parallelize. |
||
68 | /// @param UB The upper bound for the loop we parallelize. |
||
69 | /// @param Stride The stride of the loop we parallelize. |
||
70 | void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB, |
||
71 | Value *UB, Value *Stride); |
||
72 | |||
73 | void deployParallelExecution(Function *SubFn, Value *SubFnParam, Value *LB, |
||
74 | Value *UB, Value *Stride) override; |
||
75 | |||
76 | Function *prepareSubFnDefinition(Function *F) const override; |
||
77 | |||
78 | std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct, |
||
79 | SetVector<Value *> UsedValues, |
||
80 | ValueMapT &VMap) override; |
||
81 | |||
82 | /// Create a runtime library call to get the current global thread number. |
||
83 | /// |
||
84 | /// @return A Value ref which holds the current global thread number. |
||
85 | Value *createCallGlobalThreadNum(); |
||
86 | |||
87 | /// Create a runtime library call to request a number of threads. |
||
88 | /// Which will be used in the next OpenMP section (by the next fork). |
||
89 | /// |
||
90 | /// @param GlobalThreadID The global thread ID. |
||
91 | /// @param NumThreads The number of threads to use. |
||
92 | void createCallPushNumThreads(Value *GlobalThreadID, Value *NumThreads); |
||
93 | |||
94 | /// Create a runtime library call to prepare the OpenMP runtime. |
||
95 | /// For dynamically scheduled loops, saving the loop arguments. |
||
96 | /// |
||
97 | /// @param GlobalThreadID The global thread ID. |
||
98 | /// @param LB The loop's lower bound. |
||
99 | /// @param UB The loop's upper bound. |
||
100 | /// @param Inc The loop increment. |
||
101 | /// @param ChunkSize The chunk size of the parallel loop. |
||
102 | void createCallDispatchInit(Value *GlobalThreadID, Value *LB, Value *UB, |
||
103 | Value *Inc, Value *ChunkSize); |
||
104 | |||
105 | /// Create a runtime library call to retrieve the next (dynamically) |
||
106 | /// allocated chunk of work for this thread. |
||
107 | /// |
||
108 | /// @param GlobalThreadID The global thread ID. |
||
109 | /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is |
||
110 | /// the last chunk of work, or 0 otherwise. |
||
111 | /// @param LBPtr Pointer to the lower bound for the next chunk. |
||
112 | /// @param UBPtr Pointer to the upper bound for the next chunk. |
||
113 | /// @param StridePtr Pointer to the stride for the next chunk. |
||
114 | /// |
||
115 | /// @return A Value which holds 1 if there is work to be done, 0 otherwise. |
||
116 | Value *createCallDispatchNext(Value *GlobalThreadID, Value *IsLastPtr, |
||
117 | Value *LBPtr, Value *UBPtr, Value *StridePtr); |
||
118 | |||
119 | /// Create a runtime library call to prepare the OpenMP runtime. |
||
120 | /// For statically scheduled loops, saving the loop arguments. |
||
121 | /// |
||
122 | /// @param GlobalThreadID The global thread ID. |
||
123 | /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is |
||
124 | /// the last chunk of work, or 0 otherwise. |
||
125 | /// @param LBPtr Pointer to the lower bound for the next chunk. |
||
126 | /// @param UBPtr Pointer to the upper bound for the next chunk. |
||
127 | /// @param StridePtr Pointer to the stride for the next chunk. |
||
128 | /// @param ChunkSize The chunk size of the parallel loop. |
||
129 | void createCallStaticInit(Value *GlobalThreadID, Value *IsLastPtr, |
||
130 | Value *LBPtr, Value *UBPtr, Value *StridePtr, |
||
131 | Value *ChunkSize); |
||
132 | |||
133 | /// Create a runtime library call to mark the end of |
||
134 | /// a statically scheduled loop. |
||
135 | /// |
||
136 | /// @param GlobalThreadID The global thread ID. |
||
137 | void createCallStaticFini(Value *GlobalThreadID); |
||
138 | |||
139 | /// Create the current source location. |
||
140 | /// |
||
141 | /// TODO: Generates only(!) dummy values. |
||
142 | GlobalVariable *createSourceLocation(); |
||
143 | }; |
||
144 | } // end namespace polly |
||
145 | #endif |