Rev 2 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 2 | Rev 3 | ||
---|---|---|---|
Line 11... | Line 11... | ||
11 | // * If you modify this file, you may extend this exception to your version * |
11 | // * If you modify this file, you may extend this exception to your version * |
12 | // * of the file, but you are not obligated to do so. If you do not wish to * |
12 | // * of the file, but you are not obligated to do so. If you do not wish to * |
13 | // * do so, delete this exception statement from your version. * |
13 | // * do so, delete this exception statement from your version. * |
14 | // **************************************************************************** |
14 | // **************************************************************************** |
15 | 15 | ||
- | 16 | // ------------------------------------------------------------------------- |
|
- | 17 | // | xBRZ: "Scale by rules" - high quality image upscaling filter by Zenju | |
|
- | 18 | // ------------------------------------------------------------------------- |
|
- | 19 | // using a modified approach of xBR: |
|
- | 20 | // http://board.byuu.org/viewtopic.php?f=10&t=2248 |
|
- | 21 | // - new rule set preserving small image features |
|
- | 22 | // - highly optimized for performance |
|
- | 23 | // - support alpha channel |
|
- | 24 | // - support multithreading |
|
- | 25 | // - support 64-bit architectures |
|
- | 26 | // - support processing image slices |
|
- | 27 | // - support scaling up to 6xBRZ |
|
16 | 28 | ||
- | 29 | // -> map source (srcWidth * srcHeight) to target (scale * width x scale * height) image, optionally processing a half-open slice of rows [yFirst, yLast) only |
|
17 |
|
30 | // -> support for source/target pitch in bytes! |
- | 31 | // -> if your emulator changes only a few image slices during each cycle (e.g. DOSBox) then there's no need to run xBRZ on the complete image: |
|
- | 32 | // Just make sure you enlarge the source image slice by 2 rows on top and 2 on bottom (this is the additional range the xBRZ algorithm is using during analysis) |
|
- | 33 | // CAVEAT: If there are multiple changed slices, make sure they do not overlap after adding these additional rows in order to avoid a memory race condition |
|
18 |
|
34 | // in the target image data if you are using multiple threads for processing each enlarged slice! |
19 |
|
35 | // |
- | 36 | // THREAD-SAFETY: - parts of the same image may be scaled by multiple threads as long as the [yFirst, yLast) ranges do not overlap! |
|
- | 37 | // - there is a minor inefficiency for the first row of a slice, so avoid processing single rows only; suggestion: process at least 8-16 rows |
|
- | 38 | ||
- | 39 | ||
20 | #include < |
40 | #include <stddef.h> // for size_t |
21 | #include < |
41 | #include <stdint.h> // for uint32_t |
22 | #include < |
42 | #include <memory.h> // for memset() |
23 | #include < |
43 | #include <limits.h> |
24 | #include <math.h> |
44 | #include <math.h> |
25 | 45 | ||
26 | 46 | ||
27 | #ifdef __cplusplus |
47 | #ifdef __cplusplus |
28 | #define EXTERN_C extern "C" |
48 | #define EXTERN_C extern "C" |
Line 47... | Line 67... | ||
47 | #define GET_BYTE(val,byteno) ((unsigned char) (((val) >> ((byteno) << 3)) & 0xff)) |
67 | #define GET_BYTE(val,byteno) ((unsigned char) (((val) >> ((byteno) << 3)) & 0xff)) |
48 | #define GET_BLUE(val) GET_BYTE (val, 0) |
68 | #define GET_BLUE(val) GET_BYTE (val, 0) |
49 | #define GET_GREEN(val) GET_BYTE (val, 1) |
69 | #define GET_GREEN(val) GET_BYTE (val, 1) |
50 | #define GET_RED(val) GET_BYTE (val, 2) |
70 | #define GET_RED(val) GET_BYTE (val, 2) |
51 | #define GET_ALPHA(val) GET_BYTE (val, 3) |
71 | #define GET_ALPHA(val) GET_BYTE (val, 3) |
52 |
|
72 | #define CALC_COLOR24(colFront,colBack,M,N) (unsigned char) ((((unsigned char) (colFront)) * ((unsigned int) (M)) + ((unsigned char) (colBack)) * (((unsigned int) (N)) - ((unsigned int) (M)))) / ((unsigned int) (N))) |
53 |
|
73 | #define CALC_COLOR32(colFront,colBack,weightFront,weightBack,weightSum) ((unsigned char) ((((unsigned char) (colFront)) * ((unsigned int) (weightFront)) + ((unsigned char) (colBack)) * ((unsigned int) (weightBack))) / ((unsigned int) (weightSum)))) |
54 |
|
74 | #define BYTE_ADVANCE(buffer,offset) (((char *) buffer) + (offset)) |
- | 75 | #ifndef MIN |
|
- | 76 | #define MIN(a,b) ((a) < (b) ? (a) : (b)) |
|
- | 77 | #endif // MIN |
|
- | 78 | #ifndef MAX |
|
55 |
|
79 | #define MAX(a,b) ((a) > (b) ? (a) : (b)) |
- | 80 | #endif // MAX |
|
56 | 81 | ||
57 | 82 | ||
58 | namespace xbrz |
- | |
59 | { |
- | |
60 | // ------------------------------------------------------------------------- |
- | |
61 | // | xBRZ: "Scale by rules" - high quality image upscaling filter by Zenju | |
- | |
62 | // ------------------------------------------------------------------------- |
- | |
63 | // using a modified approach of xBR: |
- | |
64 | // http://board.byuu.org/viewtopic.php?f=10&t=2248 |
- | |
65 | // - new rule set preserving small image features |
- | |
66 | // - highly optimized for performance |
- | |
67 | // - support alpha channel |
- | |
68 | // - support multithreading |
- | |
69 | // - support 64-bit architectures |
- | |
70 | // - support processing image slices |
- | |
71 | // - support scaling up to 6xBRZ |
- | |
72 | - | ||
73 | // -> map source (srcWidth * srcHeight) to target (scale * width x scale * height) image, optionally processing a half-open slice of rows [yFirst, yLast) only |
- | |
74 | // -> support for source/target pitch in bytes! |
- | |
75 | // -> if your emulator changes only a few image slices during each cycle (e.g. DOSBox) then there's no need to run xBRZ on the complete image: |
- | |
76 | // Just make sure you enlarge the source image slice by 2 rows on top and 2 on bottom (this is the additional range the xBRZ algorithm is using during analysis) |
- | |
77 | // CAVEAT: If there are multiple changed slices, make sure they do not overlap after adding these additional rows in order to avoid a memory race condition |
- | |
78 | // in the target image data if you are using multiple threads for processing each enlarged slice! |
- | |
79 | // |
- | |
80 | // THREAD-SAFETY: - parts of the same image may be scaled by multiple threads as long as the [yFirst, yLast) ranges do not overlap! |
- | |
81 | // - there is a minor inefficiency for the first row of a slice, so avoid processing single rows only; suggestion: process at least 8-16 rows |
- | |
82 | - | ||
83 |
|
83 | typedef void (alphagrad_func) (uint32_t *pixBack, uint32_t pixFront, unsigned int M, unsigned int N); |
84 | - | ||
85 | - | ||
86 | template <class Pix> inline Pix* byteAdvance(Pix* ptr, int bytes) |
- | |
87 | { |
- | |
88 | using PixNonConst = typename std::remove_cv<Pix>::type; |
- | |
89 | using PixByte = typename std::conditional<std::is_same<Pix, PixNonConst>::value, char, const char>::type; |
- | |
90 | - | ||
91 | static_assert(std::is_integral<PixNonConst>::value, "Pix* is expected to be cast-able to char*"); |
- | |
92 | - | ||
93 | return reinterpret_cast<Pix*>(reinterpret_cast<PixByte*>(ptr) + bytes); |
- | |
94 | } |
- | |
95 | - | ||
96 | - | ||
97 | //fill block with the given color |
- | |
98 | template <class Pix> inline void fillBlock(Pix* trg, int pitch, Pix col, int blockWidth, int blockHeight) |
- | |
99 | { |
- | |
100 | //for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch)) |
- | |
101 | // std::fill(trg, trg + blockWidth, col); |
- | |
102 | - | ||
103 | for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch)) |
- | |
104 | for (int x = 0; x < blockWidth; ++x) |
- | |
105 | trg[x] = col; |
- | |
106 | } |
- | |
107 | - | ||
108 | - | ||
109 | template <class PixSrc, class PixTrg, class PixConverter> |
- | |
110 | void nearestNeighborScale(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch, |
- | |
111 | /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch, |
- | |
112 | int slice_type, int yFirst, int yLast, PixConverter pixCvrt /*convert PixSrc to PixTrg*/) |
- | |
113 | { |
- | |
114 | static_assert(std::is_integral<PixSrc>::value, "PixSrc* is expected to be cast-able to char*"); |
- | |
115 | static_assert(std::is_integral<PixTrg>::value, "PixTrg* is expected to be cast-able to char*"); |
- | |
116 | static_assert(std::is_same<decltype(pixCvrt(PixSrc())), PixTrg>::value, "PixConverter returning wrong pixel format"); |
- | |
117 | - | ||
118 | if (srcPitch < srcWidth * static_cast<int>(sizeof(PixSrc)) || |
- | |
119 | trgPitch < trgWidth * static_cast<int>(sizeof(PixTrg))) |
- | |
120 | { |
- | |
121 | assert(false); |
- | |
122 | return; |
- | |
123 | } |
- | |
124 | - | ||
125 | if (slice_type == XBRZ_SLICETYPE_SOURCE) |
- | |
126 | { |
- | |
127 | //nearest-neighbor (going over source image - fast for upscaling, since source is read only once |
- | |
128 | yFirst = std::max(yFirst, 0); |
- | |
129 | yLast = std::min(yLast, srcHeight); |
- | |
130 | if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0) return; |
- | |
131 | - | ||
132 | for (int y = yFirst; y < yLast; ++y) |
- | |
133 | { |
- | |
134 | //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight) |
- | |
135 | // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight |
- | |
136 | - | ||
137 | //keep within for loop to support MT input slices! |
- | |
138 | const int yTrg_first = ( y * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight) |
- | |
139 | const int yTrg_last = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight) |
- | |
140 | const int blockHeight = yTrg_last - yTrg_first; |
- | |
141 | - | ||
142 | if (blockHeight > 0) |
- | |
143 | { |
- | |
144 | const PixSrc* srcLine = byteAdvance(src, y * srcPitch); |
- | |
145 | /**/ PixTrg* trgLine = byteAdvance(trg, yTrg_first * trgPitch); |
- | |
146 | int xTrg_first = 0; |
- | |
147 | - | ||
148 |
|
84 | typedef double (dist_func) (uint32_t pix1, uint32_t pix2); |
149 | { |
- | |
150 | const int xTrg_last = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth; |
- | |
151 | const int blockWidth = xTrg_last - xTrg_first; |
- | |
152 | if (blockWidth > 0) |
- | |
153 | { |
- | |
154 | xTrg_first = xTrg_last; |
- | |
155 | - | ||
156 | const auto trgPix = pixCvrt(srcLine[x]); |
- | |
157 | fillBlock(trgLine, trgPitch, trgPix, blockWidth, blockHeight); |
- | |
158 | trgLine += blockWidth; |
- | |
159 | } |
- | |
160 | } |
- | |
161 | } |
- | |
162 | } |
- | |
163 | } |
- | |
164 | else if (slice_type == XBRZ_SLICETYPE_TARGET) |
- | |
165 | { |
- | |
166 | //nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!) |
- | |
167 | yFirst = std::max(yFirst, 0); |
- | |
168 | yLast = std::min(yLast, trgHeight); |
- | |
169 | if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return; |
- | |
170 | - | ||
171 | for (int y = yFirst; y < yLast; ++y) |
- | |
172 | { |
- | |
173 | PixTrg* trgLine = byteAdvance(trg, y * trgPitch); |
- | |
174 | const int ySrc = srcHeight * y / trgHeight; |
- | |
175 | const PixSrc* srcLine = byteAdvance(src, ySrc * srcPitch); |
- | |
176 | for (int x = 0; x < trgWidth; ++x) |
- | |
177 | { |
- | |
178 | const int xSrc = srcWidth * x / trgWidth; |
- | |
179 | trgLine[x] = pixCvrt(srcLine[xSrc]); |
- | |
180 | } |
- | |
181 | } |
- | |
182 | } |
- | |
183 | } |
- | |
184 | } |
- | |
185 | 85 | ||
186 | 86 | ||
187 | 87 | ||
188 | 88 | ||
189 | namespace |
89 | namespace |
190 | { |
90 | { |
191 | template <unsigned int M, unsigned int N> inline |
- | |
192 | uint32_t gradientRGB(uint32_t pixFront, uint32_t pixBack) //blend front color with opacity M / N over opaque background: http://en.wikipedia.org/wiki/Alpha_compositing#Alpha_blending |
- | |
193 | { |
- | |
194 | static_assert(0 < M && M < N && N <= 1000, ""); |
- | |
195 | - | ||
196 | auto calcColor = [](unsigned char colFront, unsigned char colBack) -> unsigned char { return (colFront * M + colBack * (N - M)) / N; }; |
- | |
197 | - | ||
198 | return ((calcColor (GET_RED (pixFront), GET_RED (pixBack)) << 16) |
- | |
199 | | (calcColor (GET_GREEN (pixFront), GET_GREEN (pixBack)) << 8) |
- | |
200 | | (calcColor (GET_BLUE (pixFront), GET_BLUE (pixBack)) << 0)); |
- | |
201 | } |
- | |
202 | - | ||
203 | - | ||
204 | template <unsigned int M, unsigned int N> inline |
- | |
205 | uint32_t gradientARGB(uint32_t pixFront, uint32_t pixBack) //find intermediate color between two colors with alpha channels (=> NO alpha blending!!!) |
- | |
206 | { |
- | |
207 | static_assert(0 < M && M < N && N <= 1000, ""); |
- | |
208 | - | ||
209 | const unsigned int weightFront = GET_ALPHA (pixFront) * M; |
- | |
210 | const unsigned int weightBack = GET_ALPHA (pixBack) * (N - M); |
- | |
211 | const unsigned int weightSum = weightFront + weightBack; |
- | |
212 | if (weightSum == 0) |
- | |
213 | return 0; |
- | |
214 | - | ||
215 | auto calcColor = [=](unsigned char colFront, unsigned char colBack) |
- | |
216 | { |
- | |
217 | return static_cast<unsigned char>((colFront * weightFront + colBack * weightBack) / weightSum); |
- | |
218 | }; |
- | |
219 | - | ||
220 | return (((unsigned char) (weightSum / N)) << 24) |
- | |
221 | | (calcColor (GET_RED (pixFront), GET_RED (pixBack)) << 16) |
- | |
222 | | (calcColor (GET_GREEN (pixFront), GET_GREEN (pixBack)) << 8) |
- | |
223 | | (calcColor (GET_BLUE (pixFront), GET_BLUE (pixBack)) << 0); |
- | |
224 | } |
- | |
225 | - | ||
226 | - | ||
227 | //inline |
- | |
228 | //double fastSqrt(double n) |
- | |
229 | //{ |
- | |
230 | // __asm //speeds up xBRZ by about 9% compared to /*std::*/sqrt which internally uses the same assembler instructions but adds some "fluff" |
- | |
231 | // { |
- | |
232 | // fld n |
- | |
233 | // fsqrt |
- | |
234 | // } |
- | |
235 | //} |
- | |
236 | // |
- | |
237 | - | ||
238 | - | ||
239 | #ifdef _MSC_VER |
91 | #ifdef _MSC_VER |
240 | #define FORCE_INLINE __forceinline |
92 | #define FORCE_INLINE __forceinline |
241 | #elif defined __GNUC__ |
93 | #elif defined __GNUC__ |
242 | #define FORCE_INLINE __attribute__((always_inline)) inline |
94 | #define FORCE_INLINE __attribute__((always_inline)) inline |
243 | #else |
95 | #else |
Line 245... | Line 97... | ||
245 | #endif |
97 | #endif |
246 | 98 | ||
247 | 99 | ||
248 | enum RotationDegree //clock-wise |
100 | enum RotationDegree //clock-wise |
249 | { |
101 | { |
250 |
|
102 | ROT_0 = 0, |
251 | ROT_90, |
103 | ROT_90, |
252 | ROT_180, |
104 | ROT_180, |
253 | ROT_270 |
105 | ROT_270 |
254 | }; |
106 | }; |
- | 107 | ||
255 | 108 | ||
256 | //calculate input matrix coordinates after rotation at compile time |
109 | //calculate input matrix coordinates after rotation at compile time |
257 | template <RotationDegree rotDeg, size_t I, size_t J, size_t N> |
110 | template <RotationDegree rotDeg, size_t I, size_t J, size_t N> struct MatrixRotation; |
258 | struct MatrixRotation; |
- | |
- | 111 | ||
259 | 112 | ||
260 | template <size_t I, size_t J, size_t N> |
113 | template <size_t I, size_t J, size_t N> struct MatrixRotation<ROT_0, I, J, N> |
261 | struct MatrixRotation<ROT_0, I, J, N> |
- | |
262 | { |
114 | { |
263 | static const size_t I_old = I; |
115 | static const size_t I_old = I; |
264 | static const size_t J_old = J; |
116 | static const size_t J_old = J; |
265 | }; |
117 | }; |
- | 118 | ||
266 | 119 | ||
267 | template <RotationDegree rotDeg, size_t I, size_t J, size_t N> //(i, j) = (row, col) indices, N = size of (square) matrix |
120 | template <RotationDegree rotDeg, size_t I, size_t J, size_t N> //(i, j) = (row, col) indices, N = size of (square) matrix |
268 | struct MatrixRotation |
121 | struct MatrixRotation |
269 | { |
122 | { |
270 | static const size_t I_old = N - 1 - MatrixRotation< |
123 | static const size_t I_old = N - 1 - MatrixRotation<(RotationDegree)(rotDeg - 1), I, J, N>::J_old; //old coordinates before rotation! |
271 | static const size_t J_old = MatrixRotation< |
124 | static const size_t J_old = MatrixRotation<(RotationDegree)(rotDeg - 1), I, J, N>::I_old; // |
272 | }; |
125 | }; |
273 | 126 | ||
274 | 127 | ||
275 | template <size_t N, RotationDegree rotDeg> |
128 | template <size_t N, RotationDegree rotDeg> class OutputMatrix |
276 | class OutputMatrix |
- | |
277 | { |
129 | { |
278 | public: |
130 | public: |
279 | OutputMatrix(uint32_t* |
131 | OutputMatrix (uint32_t *out, int outWidth) //access matrix area, top-left at position "out" for image with given width |
- | 132 | { |
|
280 | out_ |
133 | out_ = out; |
281 | outWidth_ |
134 | outWidth_ = outWidth; |
- | 135 | } |
|
282 | 136 | ||
283 | template <size_t I, size_t J> |
137 | template <size_t I, size_t J> uint32_t &ref() const |
284 | uint32_t& ref() const |
- | |
285 | { |
138 | { |
286 | static const size_t I_old = MatrixRotation<rotDeg, I, J, N>::I_old; |
139 | static const size_t I_old = MatrixRotation<rotDeg, I, J, N>::I_old; |
287 | static const size_t J_old = MatrixRotation<rotDeg, I, J, N>::J_old; |
140 | static const size_t J_old = MatrixRotation<rotDeg, I, J, N>::J_old; |
- | 141 | ||
288 | return *(out_ + J_old + I_old * outWidth_); |
142 | return *(out_ + J_old + I_old * outWidth_); |
289 | } |
143 | } |
290 | 144 | ||
291 | private: |
- | |
292 | uint32_t* out_; |
145 | uint32_t* out_; |
293 |
|
146 | int outWidth_; |
294 | }; |
147 | }; |
295 | 148 | ||
296 | 149 | ||
297 | template <class T> inline |
- | |
298 | T square(T value) { return value * value; } |
- | |
299 | - | ||
300 | - | ||
301 | - | ||
302 | inline |
- | |
303 | double distRGB(uint32_t pix1, uint32_t pix2) |
- | |
304 | { |
- | |
305 | const double r_diff = static_cast<int>(GET_RED (pix1)) - GET_RED (pix2); |
- | |
306 | const double g_diff = static_cast<int>(GET_GREEN (pix1)) - GET_GREEN (pix2); |
- | |
307 | const double b_diff = static_cast<int>(GET_BLUE (pix1)) - GET_BLUE (pix2); |
- | |
308 | - | ||
309 | //euklidean RGB distance |
- | |
310 | return /*std::*/sqrt(square(r_diff) + square(g_diff) + square(b_diff)); |
- | |
311 | } |
- | |
312 | - | ||
313 | - | ||
314 | inline |
- | |
315 | double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight) |
- | |
316 | { |
- | |
317 | //http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion |
- | |
318 | //YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first! |
- | |
319 | const int r_diff = static_cast<int>(GET_RED (pix1)) - GET_RED (pix2); //we may delay division by 255 to after matrix multiplication |
- | |
320 | const int g_diff = static_cast<int>(GET_GREEN (pix1)) - GET_GREEN (pix2); // |
- | |
321 | const int b_diff = static_cast<int>(GET_BLUE (pix1)) - GET_BLUE (pix2); //substraction for int is noticeable faster than for double! |
- | |
322 | - | ||
323 | //const double k_b = 0.0722; //ITU-R BT.709 conversion |
- | |
324 | //const double k_r = 0.2126; // |
- | |
325 | const double k_b = 0.0593; //ITU-R BT.2020 conversion |
- | |
326 | const double k_r = 0.2627; // |
- | |
327 | const double k_g = 1 - k_b - k_r; |
- | |
328 | - | ||
329 | const double scale_b = 0.5 / (1 - k_b); |
- | |
330 | const double scale_r = 0.5 / (1 - k_r); |
- | |
331 | - | ||
332 | const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr! |
- | |
333 | const double c_b = scale_b * (b_diff - y); |
- | |
334 | const double c_r = scale_r * (r_diff - y); |
- | |
335 | - | ||
336 | //we skip division by 255 to have similar range like other distance functions |
- | |
337 | return /*std::*/sqrt(square(lumaWeight * y) + square(c_b) + square(c_r)); |
- | |
338 | } |
- | |
339 | - | ||
340 | - | ||
341 | inline double distYCbCrBuffered(uint32_t pix1, uint32_t pix2) |
- | |
342 | { |
- | |
343 | //30% perf boost compared to plain distYCbCr()! |
- | |
344 | //consumes 64 MB memory; using double is only 2% faster, but takes 128 MB |
- | |
345 | static const std::vector<float> diffToDist = [] |
- | |
346 | { |
- | |
347 | std::vector<float> tmp; |
- | |
348 | - | ||
349 | for (uint32_t i = 0; i < 256 * 256 * 256; ++i) //startup time: 114 ms on Intel Core i5 (four cores) |
- | |
350 | { |
- | |
351 | const int r_diff = GET_RED (i) * 2 - 0xFF; |
- | |
352 | const int g_diff = GET_GREEN (i) * 2 - 0xFF; |
- | |
353 | const int b_diff = GET_BLUE (i) * 2 - 0xFF; |
- | |
354 | - | ||
355 | const double k_b = 0.0593; //ITU-R BT.2020 conversion |
- | |
356 | const double k_r = 0.2627; // |
- | |
357 | const double k_g = 1 - k_b - k_r; |
- | |
358 | - | ||
359 | const double scale_b = 0.5 / (1 - k_b); |
- | |
360 | const double scale_r = 0.5 / (1 - k_r); |
- | |
361 | - | ||
362 | const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr! |
- | |
363 | const double c_b = scale_b * (b_diff - y); |
- | |
364 | const double c_r = scale_r * (r_diff - y); |
- | |
365 | - | ||
366 | tmp.push_back(static_cast<float>(/*std::*/sqrt(square(y) + square(c_b) + square(c_r)))); |
- | |
367 | } |
- | |
368 | return tmp; |
- | |
369 | }(); |
- | |
370 | - | ||
371 | //if (pix1 == pix2) -> 8% perf degradation! |
- | |
372 | // return 0; |
- | |
373 | //if (pix1 < pix2) |
- | |
374 | // std::swap(pix1, pix2); -> 30% perf degradation!!! |
- | |
375 | #if 1 |
- | |
376 | const int r_diff = static_cast<int>(GET_RED (pix1)) - GET_RED (pix2); |
- | |
377 | const int g_diff = static_cast<int>(GET_GREEN (pix1)) - GET_GREEN (pix2); |
- | |
378 | const int b_diff = static_cast<int>(GET_BLUE (pix1)) - GET_BLUE (pix2); |
- | |
379 | - | ||
380 | return diffToDist[(((r_diff + 0xFF) / 2) << 16) | //slightly reduce precision (division by 2) to squeeze value into single byte |
- | |
381 | (((g_diff + 0xFF) / 2) << 8) | |
- | |
382 | (( b_diff + 0xFF) / 2)]; |
- | |
383 | #else //not noticeably faster: |
- | |
384 | const int r_diff_tmp = ((pix1 & 0xFF0000) + 0xFF0000 - (pix2 & 0xFF0000)) / 2; |
- | |
385 | const int g_diff_tmp = ((pix1 & 0x00FF00) + 0x00FF00 - (pix2 & 0x00FF00)) / 2; //slightly reduce precision (division by 2) to squeeze value into single byte |
- | |
386 | const int b_diff_tmp = ((pix1 & 0x0000FF) + 0x0000FF - (pix2 & 0x0000FF)) / 2; |
- | |
387 | - | ||
388 | return diffToDist[(r_diff_tmp & 0xFF0000) | (g_diff_tmp & 0x00FF00) | (b_diff_tmp & 0x0000FF)]; |
- | |
389 | #endif |
- | |
390 | } |
- | |
391 | 150 | ||
392 | 151 | ||
393 | enum BlendType |
152 | enum BlendType |
394 | { |
153 | { |
395 | BLEND_NONE = 0, |
154 | BLEND_NONE = 0, |
Line 425... | Line 184... | ||
425 | | I | J | K | L | |
184 | | I | J | K | L | |
426 | ----|---|---|---| |
185 | ----|---|---|---| |
427 | | M | N | O | P | |
186 | | M | N | O | P | |
428 | ----------------- |
187 | ----------------- |
429 | */ |
188 | */ |
430 | template <class ColorDistance> |
- | |
431 | FORCE_INLINE //detect blend direction |
189 | FORCE_INLINE //detect blend direction |
432 | BlendResult preProcessCorners(const Kernel_4x4& |
190 | BlendResult preProcessCorners(const Kernel_4x4& ker, dist_func dist) //result: F, G, J, K corners of "GradientType" |
433 | { |
191 | { |
434 | BlendResult result = {}; |
192 | BlendResult result = {}; |
435 | 193 | ||
436 | if ((ker.f == ker.g && |
194 | if ((ker.f == ker.g && |
437 | ker.j == ker.k) || |
195 | ker.j == ker.k) || |
438 | (ker.f == ker.j && |
196 | (ker.f == ker.j && |
439 | ker.g == ker.k)) |
197 | ker.g == ker.k)) |
440 | return result; |
198 | return result; |
441 | - | ||
442 | auto dist = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, XBRZ_CFG_LUMINANCE_WEIGHT); }; |
- | |
443 | 199 | ||
444 | const int weight = 4; |
200 | const int weight = 4; |
445 | double jg = dist(ker.i, ker.f) + dist(ker.f, ker.c) + dist(ker.n, ker.k) + dist(ker.k, ker.h) + weight * dist(ker.j, ker.g); |
201 | double jg = dist (ker.i, ker.f) + dist (ker.f, ker.c) + dist (ker.n, ker.k) + dist (ker.k, ker.h) + weight * dist (ker.j, ker.g); |
446 | double fk = dist(ker.e, ker.j) + dist(ker.j, ker.o) + dist(ker.b, ker.g) + dist(ker.g, ker.l) + weight * dist(ker.f, ker.k); |
202 | double fk = dist (ker.e, ker.j) + dist (ker.j, ker.o) + dist (ker.b, ker.g) + dist (ker.g, ker.l) + weight * dist (ker.f, ker.k); |
447 | 203 | ||
448 | if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8 |
204 | if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8 |
449 | { |
205 | { |
450 | const bool dominantGradient = XBRZ_CFG_DOMINANT_DIRECTION_THRESHOLD * jg < fk; |
206 | const bool dominantGradient = XBRZ_CFG_DOMINANT_DIRECTION_THRESHOLD * jg < fk; |
451 | if (ker.f != ker.g && ker.f != ker.j) |
207 | if (ker.f != ker.g && ker.f != ker.j) |
Line 471... | Line 227... | ||
471 | uint32_t |
227 | uint32_t |
472 | /**/a, b, c, |
228 | /**/a, b, c, |
473 | /**/d, e, f, |
229 | /**/d, e, f, |
474 | /**/g, h, i; |
230 | /**/g, h, i; |
475 | }; |
231 | }; |
476 | 232 | /* |
|
477 | #define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; } |
233 | #define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; } |
478 | //we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token |
234 | //we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token |
479 | DEF_GETTER(a) DEF_GETTER(b) DEF_GETTER(c) |
235 | DEF_GETTER(a) DEF_GETTER(b) DEF_GETTER(c) |
480 | DEF_GETTER(d) DEF_GETTER(e) DEF_GETTER(f) |
236 | DEF_GETTER(d) DEF_GETTER(e) DEF_GETTER(f) |
481 | DEF_GETTER(g) DEF_GETTER(h) DEF_GETTER(i) |
237 | DEF_GETTER(g) DEF_GETTER(h) DEF_GETTER(i) |
Line 496... | Line 252... | ||
496 | #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; } |
252 | #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; } |
497 | DEF_GETTER(a, c) DEF_GETTER(b, f) DEF_GETTER(c, i) |
253 | DEF_GETTER(a, c) DEF_GETTER(b, f) DEF_GETTER(c, i) |
498 | DEF_GETTER(d, b) DEF_GETTER(e, e) DEF_GETTER(f, h) |
254 | DEF_GETTER(d, b) DEF_GETTER(e, e) DEF_GETTER(f, h) |
499 | DEF_GETTER(g, a) DEF_GETTER(h, d) DEF_GETTER(i, g) |
255 | DEF_GETTER(g, a) DEF_GETTER(h, d) DEF_GETTER(i, g) |
500 | #undef DEF_GETTER |
256 | #undef DEF_GETTER |
- | 257 | */ |
|
- | 258 | ||
- | 259 | template <RotationDegree rotDeg> uint32_t inline get_a (const Kernel_3x3& ker) { return ker.a; } |
|
- | 260 | template <RotationDegree rotDeg> uint32_t inline get_b (const Kernel_3x3& ker) { return ker.b; } |
|
- | 261 | template <RotationDegree rotDeg> uint32_t inline get_c (const Kernel_3x3& ker) { return ker.c; } |
|
- | 262 | template <RotationDegree rotDeg> uint32_t inline get_d (const Kernel_3x3& ker) { return ker.d; } |
|
- | 263 | template <RotationDegree rotDeg> uint32_t inline get_e (const Kernel_3x3& ker) { return ker.e; } |
|
- | 264 | template <RotationDegree rotDeg> uint32_t inline get_f (const Kernel_3x3& ker) { return ker.f; } |
|
- | 265 | template <RotationDegree rotDeg> uint32_t inline get_g (const Kernel_3x3& ker) { return ker.g; } |
|
- | 266 | template <RotationDegree rotDeg> uint32_t inline get_h (const Kernel_3x3& ker) { return ker.h; } |
|
- | 267 | template <RotationDegree rotDeg> uint32_t inline get_i (const Kernel_3x3& ker) { return ker.i; } |
|
- | 268 | ||
- | 269 | template <> inline uint32_t get_a<ROT_90>(const Kernel_3x3& ker) { return ker.g; } |
|
- | 270 | template <> inline uint32_t get_b<ROT_90>(const Kernel_3x3& ker) { return ker.d; } |
|
- | 271 | template <> inline uint32_t get_c<ROT_90>(const Kernel_3x3& ker) { return ker.a; } |
|
- | 272 | template <> inline uint32_t get_d<ROT_90>(const Kernel_3x3& ker) { return ker.h; } |
|
- | 273 | template <> inline uint32_t get_e<ROT_90>(const Kernel_3x3& ker) { return ker.e; } |
|
- | 274 | template <> inline uint32_t get_f<ROT_90>(const Kernel_3x3& ker) { return ker.b; } |
|
- | 275 | template <> inline uint32_t get_g<ROT_90>(const Kernel_3x3& ker) { return ker.i; } |
|
- | 276 | template <> inline uint32_t get_h<ROT_90>(const Kernel_3x3& ker) { return ker.f; } |
|
- | 277 | template <> inline uint32_t get_i<ROT_90>(const Kernel_3x3& ker) { return ker.c; } |
|
- | 278 | ||
- | 279 | template <> inline uint32_t get_a<ROT_180>(const Kernel_3x3& ker) { return ker.i; } |
|
- | 280 | template <> inline uint32_t get_b<ROT_180>(const Kernel_3x3& ker) { return ker.h; } |
|
- | 281 | template <> inline uint32_t get_c<ROT_180>(const Kernel_3x3& ker) { return ker.g; } |
|
- | 282 | template <> inline uint32_t get_d<ROT_180>(const Kernel_3x3& ker) { return ker.f; } |
|
- | 283 | template <> inline uint32_t get_e<ROT_180>(const Kernel_3x3& ker) { return ker.e; } |
|
- | 284 | template <> inline uint32_t get_f<ROT_180>(const Kernel_3x3& ker) { return ker.d; } |
|
- | 285 | template <> inline uint32_t get_g<ROT_180>(const Kernel_3x3& ker) { return ker.c; } |
|
- | 286 | template <> inline uint32_t get_h<ROT_180>(const Kernel_3x3& ker) { return ker.b; } |
|
- | 287 | template <> inline uint32_t get_i<ROT_180>(const Kernel_3x3& ker) { return ker.a; } |
|
501 | 288 | ||
- | 289 | template <> inline uint32_t get_a<ROT_270>(const Kernel_3x3& ker) { return ker.c; } |
|
- | 290 | template <> inline uint32_t get_b<ROT_270>(const Kernel_3x3& ker) { return ker.f; } |
|
- | 291 | template <> inline uint32_t get_c<ROT_270>(const Kernel_3x3& ker) { return ker.i; } |
|
- | 292 | template <> inline uint32_t get_d<ROT_270>(const Kernel_3x3& ker) { return ker.b; } |
|
- | 293 | template <> inline uint32_t get_e<ROT_270>(const Kernel_3x3& ker) { return ker.e; } |
|
- | 294 | template <> inline uint32_t get_f<ROT_270>(const Kernel_3x3& ker) { return ker.h; } |
|
- | 295 | template <> inline uint32_t get_g<ROT_270>(const Kernel_3x3& ker) { return ker.a; } |
|
- | 296 | template <> inline uint32_t get_h<ROT_270>(const Kernel_3x3& ker) { return ker.d; } |
|
- | 297 | template <> inline uint32_t get_i<ROT_270>(const Kernel_3x3& ker) { return ker.g; } |
|
502 | 298 | ||
503 | //compress four blend types into a single byte |
299 | //compress four blend types into a single byte |
504 | inline BlendType getTopL (unsigned char b) { return |
300 | inline BlendType getTopL (unsigned char b) { return (BlendType)(0x3 & b); } |
505 | inline BlendType getTopR (unsigned char b) { return |
301 | inline BlendType getTopR (unsigned char b) { return (BlendType)(0x3 & (b >> 2)); } |
506 | inline BlendType getBottomR(unsigned char b) { return |
302 | inline BlendType getBottomR(unsigned char b) { return (BlendType)(0x3 & (b >> 4)); } |
507 | inline BlendType getBottomL(unsigned char b) { return |
303 | inline BlendType getBottomL(unsigned char b) { return (BlendType)(0x3 & (b >> 6)); } |
508 | 304 | ||
509 | inline void setTopL (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing! |
305 | inline void setTopL (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing! |
510 | inline void setTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); } |
306 | inline void setTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); } |
511 | inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); } |
307 | inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); } |
512 | inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); } |
308 | inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); } |
513 | - | ||
514 | inline bool blendingNeeded(unsigned char b) { return b != 0; } |
- | |
515 | 309 | ||
516 | template <RotationDegree rotDeg> inline |
310 | template <RotationDegree rotDeg> inline |
517 | unsigned char rotateBlendInfo(unsigned char b) { return b; } |
311 | unsigned char rotateBlendInfo (unsigned char b) { return b; } |
518 | template <> inline unsigned char rotateBlendInfo<ROT_90 >(unsigned char b) { return ((b << 2) | (b >> 6)) & 0xff; } |
312 | template <> inline unsigned char rotateBlendInfo<ROT_90 >(unsigned char b) { return ((b << 2) | (b >> 6)) & 0xff; } |
519 | template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { return ((b << 4) | (b >> 4)) & 0xff; } |
313 | template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { return ((b << 4) | (b >> 4)) & 0xff; } |
520 | template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; } |
314 | template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; } |
521 | 315 | ||
522 | 316 | ||
Line 528... | Line 322... | ||
528 | | D | E | F | //input pixel is at position E |
322 | | D | E | F | //input pixel is at position E |
529 | ----|---|---| |
323 | ----|---|---| |
530 | | G | H | I | |
324 | | G | H | I | |
531 | ------------- |
325 | ------------- |
532 | */ |
326 | */ |
533 | template <class Scaler, |
327 | template <class Scaler, RotationDegree rotDeg> |
534 | FORCE_INLINE //perf: quite worth it! |
- | |
535 | void blendPixel(const Kernel_3x3& ker, |
- | |
536 | uint32_t* target, int trgWidth, |
- | |
537 |
|
328 | FORCE_INLINE void blendPixel(const Kernel_3x3& ker, uint32_t *target, int trgWidth, unsigned char blendInfo, alphagrad_func alphagrad, dist_func dist) //result of preprocessing all four corners of pixel "e" |
538 | { |
329 | { |
539 | #define a get_a<rotDeg>(ker) |
330 | #define a get_a<rotDeg>(ker) |
540 | #define b get_b<rotDeg>(ker) |
331 | #define b get_b<rotDeg>(ker) |
541 | #define c get_c<rotDeg>(ker) |
332 | #define c get_c<rotDeg>(ker) |
542 | #define d get_d<rotDeg>(ker) |
333 | #define d get_d<rotDeg>(ker) |
Line 548... | Line 339... | ||
548 | 339 | ||
549 | const unsigned char blend = rotateBlendInfo<rotDeg>(blendInfo); |
340 | const unsigned char blend = rotateBlendInfo<rotDeg>(blendInfo); |
550 | 341 | ||
551 | if (getBottomR(blend) >= BLEND_NORMAL) |
342 | if (getBottomR(blend) >= BLEND_NORMAL) |
552 | { |
343 | { |
553 | auto eq = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, XBRZ_CFG_LUMINANCE_WEIGHT) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE; }; |
- | |
554 | auto dist = [&](uint32_t pix1, uint32_t pix2) { return ColorDistance::dist(pix1, pix2, XBRZ_CFG_LUMINANCE_WEIGHT); }; |
- | |
555 | - | ||
556 |
|
344 | bool doLineBlend; |
557 | { |
- | |
558 | if (getBottomR(blend) >= BLEND_DOMINANT) |
- | |
559 | return true; |
- | |
560 | - | ||
561 | //make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes |
- | |
562 | if (getTopR(blend) != BLEND_NONE && !eq(e, g)) //but support double-blending for 90° corners |
- | |
563 | return false; |
- | |
564 | if (getBottomL(blend) != BLEND_NONE && !eq(e, c)) |
- | |
565 | return false; |
- | |
566 | - | ||
567 | //no full blending for L-shapes; blend corner only (handles "mario mushroom eyes") |
- | |
568 | if (!eq(e, i) && eq(g, h) && eq(h, i) && eq(i, f) && eq(f, c)) |
- | |
569 | return false; |
- | |
570 | 345 | ||
- | 346 | if (getBottomR(blend) >= BLEND_DOMINANT) |
|
571 |
|
347 | doLineBlend = true; |
- | 348 | else if (getTopR(blend) != BLEND_NONE && (dist (e, g) >= XBRZ_CFG_EQUAL_COLOR_TOLERANCE)) //but support double-blending for 90° corners |
|
- | 349 | doLineBlend = false; // make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes |
|
- | 350 | else if (getBottomL(blend) != BLEND_NONE && (dist (e, c) >= XBRZ_CFG_EQUAL_COLOR_TOLERANCE)) |
|
- | 351 | doLineBlend = false; // make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes |
|
- | 352 | else if ((dist (e, i) >= XBRZ_CFG_EQUAL_COLOR_TOLERANCE) |
|
- | 353 | && (dist (g, h) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE) |
|
- | 354 | && (dist (h, i) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE) |
|
- | 355 | && (dist (i, f) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE) |
|
- | 356 | && (dist (f, c) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE)) |
|
- | 357 | doLineBlend = false; // no full blending for L-shapes; blend corner only (handles "mario mushroom eyes") |
|
- | 358 | else |
|
572 |
|
359 | doLineBlend = true; |
573 | 360 | ||
574 | const uint32_t px = dist(e, f) <= dist(e, h) ? f : h; //choose most similar color |
361 | const uint32_t px = (dist (e, f) <= dist (e, h) ? f : h); //choose most similar color |
575 | 362 | ||
576 | OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth); |
363 | OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth); |
577 | 364 | ||
578 | if (doLineBlend) |
365 | if (doLineBlend) |
579 | { |
366 | { |
580 | const double fg = dist(f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9 |
367 | const double fg = dist (f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9 |
581 | const double hc = dist(h, c); // |
368 | const double hc = dist (h, c); // |
582 | 369 | ||
583 | const bool haveShallowLine = XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * fg <= hc && e != g && d != g; |
370 | const bool haveShallowLine = XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * fg <= hc && e != g && d != g; |
584 | const bool haveSteepLine = XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * hc <= fg && e != c && b != c; |
371 | const bool haveSteepLine = XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * hc <= fg && e != c && b != c; |
585 | 372 | ||
586 | if (haveShallowLine) |
373 | if (haveShallowLine) |
587 | { |
374 | { |
588 | if (haveSteepLine) |
375 | if (haveSteepLine) |
589 | Scaler::blendLineSteepAndShallow(px, |
376 | Scaler::blendLineSteepAndShallow(px, out, alphagrad); |
590 | else |
377 | else |
591 | Scaler::blendLineShallow(px, |
378 | Scaler::blendLineShallow(px, out, alphagrad); |
592 | } |
379 | } |
593 | else |
380 | else |
594 | { |
381 | { |
595 | if (haveSteepLine) |
382 | if (haveSteepLine) |
596 | Scaler::blendLineSteep(px, |
383 | Scaler::blendLineSteep(px, out, alphagrad); |
597 | else |
384 | else |
598 | Scaler::blendLineDiagonal(px, |
385 | Scaler::blendLineDiagonal(px, out, alphagrad); |
599 | } |
386 | } |
600 | } |
387 | } |
601 | else |
388 | else |
602 | Scaler::blendCorner(px, |
389 | Scaler::blendCorner(px, out, alphagrad); |
603 | } |
390 | } |
604 | 391 | ||
605 | #undef a |
392 | #undef a |
606 | #undef b |
393 | #undef b |
607 | #undef c |
394 | #undef c |
Line 612... | Line 399... | ||
612 | #undef h |
399 | #undef h |
613 | #undef i |
400 | #undef i |
614 | } |
401 | } |
615 | 402 | ||
616 | 403 | ||
617 | template <class |
404 | template <class Scaler> //scaler policy: see "Scaler2x" reference implementation |
618 | void scaleImage(const uint32_t* |
405 | void scaleImage(const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight, int yFirst, int yLast, alphagrad_func alphagrad, dist_func dist) |
619 | { |
406 | { |
620 | yFirst = |
407 | yFirst = MAX (yFirst, 0); |
621 | yLast = |
408 | yLast = MIN (yLast, srcHeight); |
622 | if (yFirst >= yLast || srcWidth <= 0) |
409 | if (yFirst >= yLast || srcWidth <= 0) |
623 | return; |
410 | return; |
624 | 411 | ||
625 | const int trgWidth = srcWidth * Scaler::scale; |
412 | const int trgWidth = srcWidth * Scaler::scale; |
626 | 413 | ||
627 | //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of |
414 | //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of |
628 | //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing |
415 | //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing |
629 | const int bufferSize = srcWidth; |
416 | const int bufferSize = srcWidth; |
630 | unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize; |
417 | unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize; |
631 |
|
418 | memset (preProcBuffer, 0, bufferSize); |
632 | static_assert(BLEND_NONE == 0, ""); |
419 | static_assert(BLEND_NONE == 0, ""); |
633 | 420 | ||
634 | //initialize preprocessing buffer for first row of current stripe: detect upper left and right corner blending |
421 | //initialize preprocessing buffer for first row of current stripe: detect upper left and right corner blending |
635 | //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition! |
422 | //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition! |
636 | if (yFirst > 0) |
423 | if (yFirst > 0) |
637 | { |
424 | { |
638 | const int y = yFirst - 1; |
425 | const int y = yFirst - 1; |
639 | 426 | ||
640 | const uint32_t* s_m1 = src + srcWidth * |
427 | const uint32_t* s_m1 = src + srcWidth * MAX (y - 1, 0); |
641 | const uint32_t* s_0 = src + srcWidth * y; //center line |
428 | const uint32_t* s_0 = src + srcWidth * y; //center line |
642 | const uint32_t* s_p1 = src + srcWidth * |
429 | const uint32_t* s_p1 = src + srcWidth * MIN (y + 1, srcHeight - 1); |
643 | const uint32_t* s_p2 = src + srcWidth * |
430 | const uint32_t* s_p2 = src + srcWidth * MIN (y + 2, srcHeight - 1); |
644 | 431 | ||
645 | for (int x = 0; x < srcWidth; ++x) |
432 | for (int x = 0; x < srcWidth; ++x) |
646 | { |
433 | { |
647 | const int x_m1 = |
434 | const int x_m1 = MAX (x - 1, 0); |
648 | const int x_p1 = |
435 | const int x_p1 = MIN (x + 1, srcWidth - 1); |
649 | const int x_p2 = |
436 | const int x_p2 = MIN (x + 2, srcWidth - 1); |
650 | 437 | ||
651 | Kernel_4x4 ker = {}; //perf: initialization is negligible |
438 | Kernel_4x4 ker = {}; //perf: initialization is negligible |
652 | ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible |
439 | ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible |
653 | ker.b = s_m1[x]; |
440 | ker.b = s_m1[x]; |
654 | ker.c = s_m1[x_p1]; |
441 | ker.c = s_m1[x_p1]; |
Line 667... | Line 454... | ||
667 | ker.m = s_p2[x_m1]; |
454 | ker.m = s_p2[x_m1]; |
668 | ker.n = s_p2[x]; |
455 | ker.n = s_p2[x]; |
669 | ker.o = s_p2[x_p1]; |
456 | ker.o = s_p2[x_p1]; |
670 | ker.p = s_p2[x_p2]; |
457 | ker.p = s_p2[x_p2]; |
671 | 458 | ||
672 | const BlendResult res = preProcessCorners |
459 | const BlendResult res = preProcessCorners (ker, dist); |
673 | /* |
460 | /* |
674 | preprocessing blend result: |
461 | preprocessing blend result: |
675 | --------- |
462 | --------- |
676 | | F | G | //evalute corner between F, G, J, K |
463 | | F | G | //evalute corner between F, G, J, K |
677 | ----|---| //input pixel is at position F |
464 | ----|---| //input pixel is at position F |
Line 686... | Line 473... | ||
686 | } |
473 | } |
687 | //------------------------------------------------------------------------------------ |
474 | //------------------------------------------------------------------------------------ |
688 | 475 | ||
689 | for (int y = yFirst; y < yLast; ++y) |
476 | for (int y = yFirst; y < yLast; ++y) |
690 | { |
477 | { |
691 | uint32_t* |
478 | uint32_t *out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access |
692 | 479 | ||
693 | const uint32_t* s_m1 = src + srcWidth * |
480 | const uint32_t* s_m1 = src + srcWidth * MAX (y - 1, 0); |
694 | const uint32_t* s_0 = src + srcWidth * y; //center line |
481 | const uint32_t* s_0 = src + srcWidth * y; //center line |
695 | const uint32_t* s_p1 = src + srcWidth * |
482 | const uint32_t* s_p1 = src + srcWidth * MIN (y + 1, srcHeight - 1); |
696 | const uint32_t* s_p2 = src + srcWidth * |
483 | const uint32_t* s_p2 = src + srcWidth * MIN (y + 2, srcHeight - 1); |
697 | 484 | ||
698 | unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position |
485 | unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position |
699 | 486 | ||
700 | for (int x = 0; x < srcWidth; ++x, out += Scaler::scale) |
487 | for (int x = 0; x < srcWidth; ++x, out += Scaler::scale) |
701 | { |
488 | { |
702 | //all those bounds checks have only insignificant impact on performance! |
489 | //all those bounds checks have only insignificant impact on performance! |
703 | const int x_m1 = |
490 | const int x_m1 = MAX (x - 1, 0); //perf: prefer array indexing to additional pointers! |
704 | const int x_p1 = |
491 | const int x_p1 = MIN (x + 1, srcWidth - 1); |
705 | const int x_p2 = |
492 | const int x_p2 = MIN (x + 2, srcWidth - 1); |
706 | 493 | ||
707 | Kernel_4x4 ker4 = {}; //perf: initialization is negligible |
494 | Kernel_4x4 ker4 = {}; //perf: initialization is negligible |
708 | 495 | ||
709 | ker4.a = s_m1[x_m1]; //read sequentially from memory as far as possible |
496 | ker4.a = s_m1[x_m1]; //read sequentially from memory as far as possible |
710 | ker4.b = s_m1[x]; |
497 | ker4.b = s_m1[x]; |
Line 727... | Line 514... | ||
727 | ker4.p = s_p2[x_p2]; |
514 | ker4.p = s_p2[x_p2]; |
728 | 515 | ||
729 | //evaluate the four corners on bottom-right of current pixel |
516 | //evaluate the four corners on bottom-right of current pixel |
730 | unsigned char blend_xy = 0; //for current (x, y) position |
517 | unsigned char blend_xy = 0; //for current (x, y) position |
731 | { |
518 | { |
732 | const BlendResult res = preProcessCorners |
519 | const BlendResult res = preProcessCorners (ker4, dist); |
733 | /* |
520 | /* |
734 | preprocessing blend result: |
521 | preprocessing blend result: |
735 | --------- |
522 | --------- |
736 | | F | G | //evalute corner between F, G, J, K |
523 | | F | G | //evalute corner between F, G, J, K |
737 | ----|---| //current input pixel is at position F |
524 | ----|---| //current input pixel is at position F |
Line 750... | Line 537... | ||
750 | if (x + 1 < bufferSize) //set 3rd known corner for (x + 1, y) |
537 | if (x + 1 < bufferSize) //set 3rd known corner for (x + 1, y) |
751 | setBottomL(preProcBuffer[x + 1], res.blend_g); |
538 | setBottomL(preProcBuffer[x + 1], res.blend_g); |
752 | } |
539 | } |
753 | 540 | ||
754 | //fill block of size scale * scale with the given color |
541 | //fill block of size scale * scale with the given color |
- | 542 | { |
|
- | 543 | uint32_t *blk = out; |
|
- | 544 | for (int _blk_y = 0; _blk_y < Scaler::scale; ++_blk_y, blk = (uint32_t *) BYTE_ADVANCE (blk, trgWidth * sizeof (uint32_t))) |
|
755 |
|
545 | for (int _blk_x = 0; _blk_x < Scaler::scale; ++_blk_x) |
- | 546 | blk[_blk_x] = ker4.f; |
|
- | 547 | } |
|
756 | //place *after* preprocessing step, to not overwrite the results while processing the the last pixel! |
548 | //place *after* preprocessing step, to not overwrite the results while processing the the last pixel! |
757 | 549 | ||
758 | //blend four corners of current pixel |
550 | //blend four corners of current pixel |
759 | if ( |
551 | if (blend_xy != 0) //good 5% perf-improvement |
760 | { |
552 | { |
761 | Kernel_3x3 ker3 = {}; //perf: initialization is negligible |
553 | Kernel_3x3 ker3 = {}; //perf: initialization is negligible |
762 | 554 | ||
763 | ker3.a = ker4.a; |
555 | ker3.a = ker4.a; |
764 | ker3.b = ker4.b; |
556 | ker3.b = ker4.b; |
Line 770... | Line 562... | ||
770 | 562 | ||
771 | ker3.g = ker4.i; |
563 | ker3.g = ker4.i; |
772 | ker3.h = ker4.j; |
564 | ker3.h = ker4.j; |
773 | ker3.i = ker4.k; |
565 | ker3.i = ker4.k; |
774 | 566 | ||
775 | blendPixel<Scaler, |
567 | blendPixel<Scaler, ROT_0 >(ker3, out, trgWidth, blend_xy, alphagrad, dist); |
776 | blendPixel<Scaler, |
568 | blendPixel<Scaler, ROT_90 >(ker3, out, trgWidth, blend_xy, alphagrad, dist); |
777 | blendPixel<Scaler, |
569 | blendPixel<Scaler, ROT_180>(ker3, out, trgWidth, blend_xy, alphagrad, dist); |
778 | blendPixel<Scaler, |
570 | blendPixel<Scaler, ROT_270>(ker3, out, trgWidth, blend_xy, alphagrad, dist); |
779 | } |
571 | } |
780 | } |
572 | } |
781 | } |
573 | } |
782 | } |
574 | } |
783 | 575 | ||
784 | 576 | ||
785 | //------------------------------------------------------------------------------------ |
577 | //------------------------------------------------------------------------------------ |
786 |
|
578 | struct Scaler2x |
787 | { |
579 | { |
788 | static const int scale = 2; |
580 | static const int scale = 2; |
789 | - | ||
790 | template <unsigned int M, unsigned int N> //bring template function into scope for GCC |
- | |
791 | static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); } |
- | |
792 | 581 | ||
793 | 582 | ||
794 | template <class OutputMatrix> |
583 | template <class OutputMatrix> |
795 | static void blendLineShallow(uint32_t col, OutputMatrix& |
584 | static void blendLineShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
796 | { |
585 | { |
797 |
|
586 | alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4); |
798 |
|
587 | alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4); |
799 | } |
588 | } |
800 | 589 | ||
801 | template <class OutputMatrix> |
590 | template <class OutputMatrix> |
802 | static void blendLineSteep(uint32_t col, OutputMatrix& |
591 | static void blendLineSteep(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
803 | { |
592 | { |
804 |
|
593 | alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4); |
805 |
|
594 | alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4); |
806 | } |
595 | } |
807 | 596 | ||
808 | template <class OutputMatrix> |
597 | template <class OutputMatrix> |
809 | static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& |
598 | static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
810 | { |
599 | { |
811 |
|
600 | alphagrad (&(out.template ref<1, 0>()), col, 1, 4); |
812 |
|
601 | alphagrad (&(out.template ref<0, 1>()), col, 1, 4); |
813 |
|
602 | alphagrad (&(out.template ref<1, 1>()), col, 5, 6); //[!] fixes 7/8 used in xBR |
814 | } |
603 | } |
815 | 604 | ||
816 | template <class OutputMatrix> |
605 | template <class OutputMatrix> |
817 | static void blendLineDiagonal(uint32_t col, OutputMatrix& |
606 | static void blendLineDiagonal(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
818 | { |
607 | { |
819 |
|
608 | alphagrad (&(out.template ref<1, 1>()), col, 1, 2); |
820 | } |
609 | } |
821 | 610 | ||
822 | template <class OutputMatrix> |
611 | template <class OutputMatrix> |
823 | static void blendCorner(uint32_t col, OutputMatrix& |
612 | static void blendCorner(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
824 | { |
613 | { |
825 | //model a round corner |
614 | //model a round corner |
826 |
|
615 | alphagrad (&(out.template ref<1, 1>()), col, 21, 100); //exact: 1 - pi/4 = 0.2146018366 |
827 | } |
616 | } |
828 | }; |
617 | }; |
829 | 618 | ||
830 | 619 | ||
831 |
|
620 | struct Scaler3x |
832 | { |
621 | { |
833 | static const int scale = 3; |
622 | static const int scale = 3; |
834 | - | ||
835 | template <unsigned int M, unsigned int N> //bring template function into scope for GCC |
- | |
836 | static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); } |
- | |
837 | 623 | ||
838 | 624 | ||
839 | template <class OutputMatrix> |
625 | template <class OutputMatrix> |
840 | static void blendLineShallow(uint32_t col, OutputMatrix& |
626 | static void blendLineShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
841 | { |
627 | { |
842 |
|
628 | alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4); |
843 |
|
629 | alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4); |
844 | - | ||
845 |
|
630 | alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4); |
846 | out.template ref<scale - 1, 2>() = col; |
631 | out.template ref<scale - 1, 2>() = col; |
847 | } |
632 | } |
848 | 633 | ||
849 | template <class OutputMatrix> |
634 | template <class OutputMatrix> |
850 | static void blendLineSteep(uint32_t col, OutputMatrix& |
635 | static void blendLineSteep(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
851 | { |
636 | { |
852 |
|
637 | alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4); |
853 |
|
638 | alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4); |
854 | - | ||
855 |
|
639 | alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4); |
856 | out.template ref<2, scale - 1>() = col; |
640 | out.template ref<2, scale - 1>() = col; |
857 | } |
641 | } |
858 | 642 | ||
859 | template <class OutputMatrix> |
643 | template <class OutputMatrix> |
860 | static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& |
644 | static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
861 | { |
645 | { |
862 |
|
646 | alphagrad (&(out.template ref<2, 0>()), col, 1, 4); |
863 |
|
647 | alphagrad (&(out.template ref<0, 2>()), col, 1, 4); |
864 |
|
648 | alphagrad (&(out.template ref<2, 1>()), col, 3, 4); |
865 |
|
649 | alphagrad (&(out.template ref<1, 2>()), col, 3, 4); |
866 | out.template ref<2, 2>() = col; |
650 | out.template ref<2, 2>() = col; |
867 | } |
651 | } |
868 | 652 | ||
869 | template <class OutputMatrix> |
653 | template <class OutputMatrix> |
870 | static void blendLineDiagonal(uint32_t col, OutputMatrix& |
654 | static void blendLineDiagonal(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
871 | { |
655 | { |
872 |
|
656 | alphagrad (&(out.template ref<1, 2>()), col, 1, 8); //conflict with other rotations for this odd scale |
873 |
|
657 | alphagrad (&(out.template ref<2, 1>()), col, 1, 8); |
874 |
|
658 | alphagrad (&(out.template ref<2, 2>()), col, 7, 8); // |
875 | } |
659 | } |
876 | 660 | ||
877 | template <class OutputMatrix> |
661 | template <class OutputMatrix> |
878 | static void blendCorner(uint32_t col, OutputMatrix& |
662 | static void blendCorner(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
879 | { |
663 | { |
880 | //model a round corner |
664 | //model a round corner |
881 |
|
665 | alphagrad (&(out.template ref<2, 2>()), col, 45, 100); //exact: 0.4545939598 |
882 | // |
666 | //alphagrad (&(out.template ref<2, 1>()), col, 7, 256); //0.02826017254 -> negligible + avoid conflicts with other rotations for this odd scale |
883 | // |
667 | //alphagrad (&(out.template ref<1, 2>()), col, 7, 256); //0.02826017254 |
884 | } |
668 | } |
885 | }; |
669 | }; |
886 | 670 | ||
887 | 671 | ||
888 |
|
672 | struct Scaler4x |
889 | { |
673 | { |
890 | static const int scale = 4; |
674 | static const int scale = 4; |
891 | - | ||
892 | template <unsigned int M, unsigned int N> //bring template function into scope for GCC |
- | |
893 | static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); } |
- | |
894 | 675 | ||
895 | 676 | ||
896 | template <class OutputMatrix> |
677 | template <class OutputMatrix> |
897 | static void blendLineShallow(uint32_t col, OutputMatrix& |
678 | static void blendLineShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
898 | { |
679 | { |
899 |
|
680 | alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4); |
900 |
|
681 | alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4); |
901 | - | ||
902 |
|
682 | alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4); |
903 |
|
683 | alphagrad (&(out.template ref<scale - 2, 3>()), col, 3, 4); |
904 | 684 | ||
905 | out.template ref<scale - 1, 2>() = col; |
685 | out.template ref<scale - 1, 2>() = col; |
906 | out.template ref<scale - 1, 3>() = col; |
686 | out.template ref<scale - 1, 3>() = col; |
907 | } |
687 | } |
908 | 688 | ||
909 | template <class OutputMatrix> |
689 | template <class OutputMatrix> |
910 | static void blendLineSteep(uint32_t col, OutputMatrix& |
690 | static void blendLineSteep(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
911 | { |
691 | { |
912 |
|
692 | alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4); |
913 |
|
693 | alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4); |
914 | - | ||
915 |
|
694 | alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4); |
916 |
|
695 | alphagrad (&(out.template ref<3, scale - 2>()), col, 3, 4); |
917 | 696 | ||
918 | out.template ref<2, scale - 1>() = col; |
697 | out.template ref<2, scale - 1>() = col; |
919 | out.template ref<3, scale - 1>() = col; |
698 | out.template ref<3, scale - 1>() = col; |
920 | } |
699 | } |
921 | 700 | ||
922 | template <class OutputMatrix> |
701 | template <class OutputMatrix> |
923 | static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& |
702 | static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
924 | { |
703 | { |
925 |
|
704 | alphagrad (&(out.template ref<3, 1>()), col, 3, 4); |
926 |
|
705 | alphagrad (&(out.template ref<1, 3>()), col, 3, 4); |
927 |
|
706 | alphagrad (&(out.template ref<3, 0>()), col, 1, 4); |
928 |
|
707 | alphagrad (&(out.template ref<0, 3>()), col, 1, 4); |
929 | - | ||
930 |
|
708 | alphagrad (&(out.template ref<2, 2>()), col, 1, 3); //[!] fixes 1/4 used in xBR |
931 | 709 | ||
932 | out.template ref<3, 3>() = col; |
710 | out.template ref<3, 3>() = col; |
933 | out.template ref<3, 2>() = col; |
711 | out.template ref<3, 2>() = col; |
934 | out.template ref<2, 3>() = col; |
712 | out.template ref<2, 3>() = col; |
935 | } |
713 | } |
936 | 714 | ||
937 | template <class OutputMatrix> |
715 | template <class OutputMatrix> |
938 | static void blendLineDiagonal(uint32_t col, OutputMatrix& |
716 | static void blendLineDiagonal(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
939 | { |
717 | { |
940 |
|
718 | alphagrad (&(out.template ref<scale - 1, scale / 2 >()), col, 1, 2); |
941 |
|
719 | alphagrad (&(out.template ref<scale - 2, scale / 2 + 1>()), col, 1, 2); |
- | 720 | ||
942 | out.template ref<scale - 1, scale - 1>() = col; |
721 | out.template ref<scale - 1, scale - 1>() = col; |
943 | } |
722 | } |
944 | 723 | ||
945 | template <class OutputMatrix> |
724 | template <class OutputMatrix> |
946 | static void blendCorner(uint32_t col, OutputMatrix& |
725 | static void blendCorner(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
947 | { |
726 | { |
948 | //model a round corner |
727 | //model a round corner |
949 |
|
728 | alphagrad (&(out.template ref<3, 3>()), col, 68, 100); //exact: 0.6848532563 |
950 |
|
729 | alphagrad (&(out.template ref<3, 2>()), col, 9, 100); //0.08677704501 |
951 |
|
730 | alphagrad (&(out.template ref<2, 3>()), col, 9, 100); //0.08677704501 |
952 | } |
731 | } |
953 | }; |
732 | }; |
954 | 733 | ||
955 | 734 | ||
956 |
|
735 | struct Scaler5x |
957 | { |
736 | { |
958 | static const int scale = 5; |
737 | static const int scale = 5; |
959 | - | ||
960 | template <unsigned int M, unsigned int N> //bring template function into scope for GCC |
- | |
961 | static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); } |
- | |
962 | 738 | ||
963 | 739 | ||
964 | template <class OutputMatrix> |
740 | template <class OutputMatrix> |
965 | static void blendLineShallow(uint32_t col, OutputMatrix& |
741 | static void blendLineShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
966 | { |
742 | { |
967 |
|
743 | alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4); |
968 |
|
744 | alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4); |
969 |
|
745 | alphagrad (&(out.template ref<scale - 3, 4>()), col, 1, 4); |
970 | - | ||
971 |
|
746 | alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4); |
972 |
|
747 | alphagrad (&(out.template ref<scale - 2, 3>()), col, 3, 4); |
973 | 748 | ||
974 | out.template ref<scale - 1, 2>() = col; |
749 | out.template ref<scale - 1, 2>() = col; |
975 | out.template ref<scale - 1, 3>() = col; |
750 | out.template ref<scale - 1, 3>() = col; |
976 | out.template ref<scale - 1, 4>() = col; |
751 | out.template ref<scale - 1, 4>() = col; |
977 | out.template ref<scale - 2, 4>() = col; |
752 | out.template ref<scale - 2, 4>() = col; |
978 | } |
753 | } |
979 | 754 | ||
980 | template <class OutputMatrix> |
755 | template <class OutputMatrix> |
981 | static void blendLineSteep(uint32_t col, OutputMatrix& |
756 | static void blendLineSteep(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
982 | { |
757 | { |
983 |
|
758 | alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4); |
984 |
|
759 | alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4); |
985 |
|
760 | alphagrad (&(out.template ref<4, scale - 3>()), col, 1, 4); |
986 | - | ||
987 |
|
761 | alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4); |
988 |
|
762 | alphagrad (&(out.template ref<3, scale - 2>()), col, 3, 4); |
989 | 763 | ||
990 | out.template ref<2, scale - 1>() = col; |
764 | out.template ref<2, scale - 1>() = col; |
991 | out.template ref<3, scale - 1>() = col; |
765 | out.template ref<3, scale - 1>() = col; |
992 | out.template ref<4, scale - 1>() = col; |
766 | out.template ref<4, scale - 1>() = col; |
993 | out.template ref<4, scale - 2>() = col; |
767 | out.template ref<4, scale - 2>() = col; |
994 | } |
768 | } |
995 | 769 | ||
996 | template <class OutputMatrix> |
770 | template <class OutputMatrix> |
997 | static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& |
771 | static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
998 | { |
772 | { |
999 |
|
773 | alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4); |
1000 |
|
774 | alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4); |
1001 |
|
775 | alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4); |
1002 | - | ||
1003 |
|
776 | alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4); |
1004 |
|
777 | alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4); |
1005 |
|
778 | alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4); |
1006 | - | ||
1007 |
|
779 | alphagrad (&(out.template ref<3, 3>()), col, 2, 3); |
1008 | 780 | ||
1009 | out.template ref<2, scale - 1>() = col; |
781 | out.template ref<2, scale - 1>() = col; |
1010 | out.template ref<3, scale - 1>() = col; |
782 | out.template ref<3, scale - 1>() = col; |
1011 | out.template ref<4, scale - 1>() = col; |
783 | out.template ref<4, scale - 1>() = col; |
1012 | - | ||
1013 | out.template ref<scale - 1, 2>() = col; |
784 | out.template ref<scale - 1, 2>() = col; |
1014 | out.template ref<scale - 1, 3>() = col; |
785 | out.template ref<scale - 1, 3>() = col; |
1015 | } |
786 | } |
1016 | 787 | ||
1017 | template <class OutputMatrix> |
788 | template <class OutputMatrix> |
1018 | static void blendLineDiagonal(uint32_t col, OutputMatrix& |
789 | static void blendLineDiagonal(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
1019 | { |
790 | { |
1020 |
|
791 | alphagrad (&(out.template ref<scale - 1, scale / 2 >()), col, 1, 8); //conflict with other rotations for this odd scale |
1021 |
|
792 | alphagrad (&(out.template ref<scale - 2, scale / 2 + 1>()), col, 1, 8); |
1022 |
|
793 | alphagrad (&(out.template ref<scale - 3, scale / 2 + 2>()), col, 1, 8); // |
1023 | - | ||
1024 |
|
794 | alphagrad (&(out.template ref<4, 3>()), col, 7, 8); |
1025 |
|
795 | alphagrad (&(out.template ref<3, 4>()), col, 7, 8); |
1026 | 796 | ||
1027 | out.template ref<4, 4>() = col; |
797 | out.template ref<4, 4>() = col; |
1028 | } |
798 | } |
1029 | 799 | ||
1030 | template <class OutputMatrix> |
800 | template <class OutputMatrix> |
1031 | static void blendCorner(uint32_t col, OutputMatrix& |
801 | static void blendCorner(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
1032 | { |
802 | { |
1033 | // model a round corner |
803 | // model a round corner |
1034 |
|
804 | alphagrad (&(out.template ref<4, 4>()), col, 86, 100); //exact: 0.8631434088 |
1035 |
|
805 | alphagrad (&(out.template ref<4, 3>()), col, 23, 100); //0.2306749731 |
1036 |
|
806 | alphagrad (&(out.template ref<3, 4>()), col, 23, 100); //0.2306749731 |
1037 | //alphaGrad<1, 64>(out.template ref<4, 2>(), col); //0.01676812367 -> negligible + avoid conflicts with other rotations for this odd scale |
807 | //alphaGrad<1, 64>(out.template ref<4, 2>(), col); //0.01676812367 -> negligible + avoid conflicts with other rotations for this odd scale |
1038 | //alphaGrad<1, 64>(out.template ref<2, 4>(), col); //0.01676812367 |
808 | //alphaGrad<1, 64>(out.template ref<2, 4>(), col); //0.01676812367 |
1039 | } |
809 | } |
1040 | }; |
810 | }; |
1041 | 811 | ||
1042 | 812 | ||
1043 |
|
813 | struct Scaler6x |
1044 | { |
814 | { |
1045 | static const int scale = 6; |
815 | static const int scale = 6; |
1046 | - | ||
1047 | template <unsigned int M, unsigned int N> //bring template function into scope for GCC |
- | |
1048 | static void alphaGrad(uint32_t& pixBack, uint32_t pixFront) { ColorGradient::template alphaGrad<M, N>(pixBack, pixFront); } |
- | |
1049 | 816 | ||
1050 | 817 | ||
1051 | template <class OutputMatrix> |
818 | template <class OutputMatrix> |
1052 | static void blendLineShallow(uint32_t col, OutputMatrix& |
819 | static void blendLineShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
1053 | { |
820 | { |
1054 |
|
821 | alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4); |
1055 |
|
822 | alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4); |
1056 |
|
823 | alphagrad (&(out.template ref<scale - 3, 4>()), col, 1, 4); |
1057 | - | ||
1058 |
|
824 | alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4); |
1059 |
|
825 | alphagrad (&(out.template ref<scale - 2, 3>()), col, 3, 4); |
1060 |
|
826 | alphagrad (&(out.template ref<scale - 3, 5>()), col, 3, 4); |
1061 | 827 | ||
1062 | out.template ref<scale - 1, 2>() = col; |
828 | out.template ref<scale - 1, 2>() = col; |
1063 | out.template ref<scale - 1, 3>() = col; |
829 | out.template ref<scale - 1, 3>() = col; |
1064 | out.template ref<scale - 1, 4>() = col; |
830 | out.template ref<scale - 1, 4>() = col; |
1065 | out.template ref<scale - 1, 5>() = col; |
831 | out.template ref<scale - 1, 5>() = col; |
1066 | - | ||
1067 | out.template ref<scale - 2, 4>() = col; |
832 | out.template ref<scale - 2, 4>() = col; |
1068 | out.template ref<scale - 2, 5>() = col; |
833 | out.template ref<scale - 2, 5>() = col; |
1069 | } |
834 | } |
1070 | 835 | ||
1071 | template <class OutputMatrix> |
836 | template <class OutputMatrix> |
1072 | static void blendLineSteep(uint32_t col, OutputMatrix& |
837 | static void blendLineSteep(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
1073 | { |
838 | { |
1074 |
|
839 | alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4); |
1075 |
|
840 | alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4); |
1076 |
|
841 | alphagrad (&(out.template ref<4, scale - 3>()), col, 1, 4); |
1077 | - | ||
1078 |
|
842 | alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4); |
1079 |
|
843 | alphagrad (&(out.template ref<3, scale - 2>()), col, 3, 4); |
1080 |
|
844 | alphagrad (&(out.template ref<5, scale - 3>()), col, 3, 4); |
1081 | 845 | ||
1082 | out.template ref<2, scale - 1>() = col; |
846 | out.template ref<2, scale - 1>() = col; |
1083 | out.template ref<3, scale - 1>() = col; |
847 | out.template ref<3, scale - 1>() = col; |
1084 | out.template ref<4, scale - 1>() = col; |
848 | out.template ref<4, scale - 1>() = col; |
1085 | out.template ref<5, scale - 1>() = col; |
849 | out.template ref<5, scale - 1>() = col; |
1086 | - | ||
1087 | out.template ref<4, scale - 2>() = col; |
850 | out.template ref<4, scale - 2>() = col; |
1088 | out.template ref<5, scale - 2>() = col; |
851 | out.template ref<5, scale - 2>() = col; |
1089 | } |
852 | } |
1090 | 853 | ||
1091 | template <class OutputMatrix> |
854 | template <class OutputMatrix> |
1092 | static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& |
855 | static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
1093 | { |
856 | { |
1094 |
|
857 | alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4); |
1095 |
|
858 | alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4); |
1096 |
|
859 | alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4); |
1097 |
|
860 | alphagrad (&(out.template ref<3, scale - 2>()), col, 3, 4); |
1098 | - | ||
1099 |
|
861 | alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4); |
1100 |
|
862 | alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4); |
1101 |
|
863 | alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4); |
1102 |
|
864 | alphagrad (&(out.template ref<scale - 2, 3>()), col, 3, 4); |
1103 | 865 | ||
1104 | out.template ref<2, scale - 1>() = col; |
866 | out.template ref<2, scale - 1>() = col; |
1105 | out.template ref<3, scale - 1>() = col; |
867 | out.template ref<3, scale - 1>() = col; |
1106 | out.template ref<4, scale - 1>() = col; |
868 | out.template ref<4, scale - 1>() = col; |
1107 | out.template ref<5, scale - 1>() = col; |
869 | out.template ref<5, scale - 1>() = col; |
1108 | - | ||
1109 | out.template ref<4, scale - 2>() = col; |
870 | out.template ref<4, scale - 2>() = col; |
1110 | out.template ref<5, scale - 2>() = col; |
871 | out.template ref<5, scale - 2>() = col; |
1111 | - | ||
1112 | out.template ref<scale - 1, 2>() = col; |
872 | out.template ref<scale - 1, 2>() = col; |
1113 | out.template ref<scale - 1, 3>() = col; |
873 | out.template ref<scale - 1, 3>() = col; |
1114 | } |
874 | } |
1115 | 875 | ||
1116 | template <class OutputMatrix> |
876 | template <class OutputMatrix> |
1117 | static void blendLineDiagonal(uint32_t col, OutputMatrix& |
877 | static void blendLineDiagonal(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
1118 | { |
878 | { |
1119 |
|
879 | alphagrad (&(out.template ref<scale - 1, scale / 2 >()), col, 1, 2); |
1120 |
|
880 | alphagrad (&(out.template ref<scale - 2, scale / 2 + 1>()), col, 1, 2); |
1121 |
|
881 | alphagrad (&(out.template ref<scale - 3, scale / 2 + 2>()), col, 1, 2); |
1122 | 882 | ||
1123 | out.template ref<scale - 2, scale - 1>() = col; |
883 | out.template ref<scale - 2, scale - 1>() = col; |
1124 | out.template ref<scale - 1, scale - 1>() = col; |
884 | out.template ref<scale - 1, scale - 1>() = col; |
1125 | out.template ref<scale - 1, scale - 2>() = col; |
885 | out.template ref<scale - 1, scale - 2>() = col; |
1126 | } |
886 | } |
1127 | 887 | ||
1128 | template <class OutputMatrix> |
888 | template <class OutputMatrix> |
1129 | static void blendCorner(uint32_t col, OutputMatrix& |
889 | static void blendCorner(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad) |
1130 | { |
890 | { |
1131 | //model a round corner |
891 | //model a round corner |
1132 |
|
892 | alphagrad (&(out.template ref<5, 5>()), col, 97, 100); //exact: 0.9711013910 |
1133 |
|
893 | alphagrad (&(out.template ref<4, 5>()), col, 42, 100); //0.4236372243 |
1134 |
|
894 | alphagrad (&(out.template ref<5, 4>()), col, 42, 100); //0.4236372243 |
1135 |
|
895 | alphagrad (&(out.template ref<5, 3>()), col, 6, 100); //0.05652034508 |
1136 |
|
896 | alphagrad (&(out.template ref<3, 5>()), col, 6, 100); //0.05652034508 |
1137 | } |
897 | } |
1138 | }; |
898 | }; |
1139 | 899 | ||
1140 | //------------------------------------------------------------------------------------ |
900 | //------------------------------------------------------------------------------------ |
1141 | struct ColorDistanceRGB |
- | |
1142 |
|
901 | } |
1143 | static double dist(uint32_t pix1, uint32_t pix2, double luminanceWeight) |
- | |
1144 | { |
- | |
1145 | return distYCbCrBuffered(pix1, pix2); |
- | |
1146 | 902 | ||
1147 | //if (pix1 == pix2) //about 4% perf boost |
- | |
1148 | // return 0; |
- | |
1149 | //return distYCbCr(pix1, pix2, luminanceWeight); |
- | |
1150 | } |
- | |
1151 | }; |
- | |
1152 | 903 | ||
1153 | struct ColorDistanceARGB |
- | |
1154 | { |
- | |
1155 | static double dist(uint32_t pix1, uint32_t pix2, double luminanceWeight) |
- | |
1156 | { |
- | |
1157 | const double a1 = GET_ALPHA (pix1) / 255.0 ; |
- | |
1158 | const double a2 = GET_ALPHA (pix2) / 255.0 ; |
- | |
1159 | /* |
- | |
1160 | Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1] |
- | |
1161 | |
- | |
1162 | 1. if a1 = a2, distance should be: a1 * distYCbCr() |
- | |
1163 | 2. if a1 = 0, distance should be: a2 * distYCbCr(black, white) = a2 * 255 |
- | |
1164 | 3. if a1 = 1, ??? maybe: 255 * (1 - a2) + a2 * distYCbCr() |
- | |
1165 | */ |
- | |
1166 | 904 | ||
- | 905 | static double dist24 (uint32_t pix1, uint32_t pix2) |
|
- | 906 | { |
|
- | 907 | //30% perf boost compared to plain distYCbCr()! |
|
1167 |
|
908 | //consumes 64 MB memory; using double is only 2% faster, but takes 128 MB |
1168 |
|
909 | static float diffToDist[256 * 256 * 256]; |
1169 |
|
910 | static bool is_initialized = false; |
1170 |
|
911 | if (!is_initialized) |
- | 912 | { |
|
1171 |
|
913 | for (uint32_t i = 0; i < 256 * 256 * 256; ++i) //startup time: 114 ms on Intel Core i5 (four cores) |
1172 |
|
914 | { |
1173 |
|
915 | const int r_diff = GET_RED (i) * 2 - 0xFF; |
- | 916 | const int g_diff = GET_GREEN (i) * 2 - 0xFF; |
|
- | 917 | const int b_diff = GET_BLUE (i) * 2 - 0xFF; |
|
1174 | 918 | ||
1175 |
|
919 | const double k_b = 0.0593; //ITU-R BT.2020 conversion |
1176 |
|
920 | const double k_r = 0.2627; // |
1177 | }; |
- | |
- | 921 | const double k_g = 1 - k_b - k_r; |
|
1178 | 922 | ||
1179 | struct ColorGradientRGB |
- | |
1180 | { |
- | |
1181 |
|
923 | const double scale_b = 0.5 / (1 - k_b); |
1182 | { |
- | |
1183 |
|
924 | const double scale_r = 0.5 / (1 - k_r); |
1184 | } |
- | |
1185 | }; |
- | |
1186 | 925 | ||
- | 926 | const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr! |
|
- | 927 | const double c_b = scale_b * (b_diff - y); |
|
1187 |
|
928 | const double c_r = scale_r * (r_diff - y); |
1188 | { |
929 | |
1189 |
|
930 | diffToDist[i] = (float) (sqrt ((y * y) + (c_b * c_b) + (c_r * c_r))); |
1190 |
|
931 | } |
1191 |
|
932 | is_initialized = true; |
1192 |
|
933 | } |
- | 934 | ||
- | 935 | const int r_diff = (int) GET_RED (pix1) - (int) GET_RED (pix2); |
|
- | 936 | const int g_diff = (int) GET_GREEN (pix1) - (int) GET_GREEN (pix2); |
|
- | 937 | const int b_diff = (int) GET_BLUE (pix1) - (int) GET_BLUE (pix2); |
|
1193 | }; |
938 | |
- | 939 | return diffToDist[(((r_diff + 0xFF) / 2) << 16) | //slightly reduce precision (division by 2) to squeeze value into single byte |
|
- | 940 | (((g_diff + 0xFF) / 2) << 8) | |
|
- | 941 | (((b_diff + 0xFF) / 2) << 0)]; |
|
1194 | } |
942 | } |
1195 | 943 | ||
1196 | 944 | ||
- | 945 | static double dist32 (uint32_t pix1, uint32_t pix2) |
|
- | 946 | { |
|
- | 947 | const double a1 = GET_ALPHA (pix1) / 255.0 ; |
|
- | 948 | const double a2 = GET_ALPHA (pix2) / 255.0 ; |
|
- | 949 | /* |
|
- | 950 | Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1] |
|
1197 | 951 | ||
- | 952 | 1. if a1 = a2, distance should be: a1 * distYCbCr() |
|
- | 953 | 2. if a1 = 0, distance should be: a2 * distYCbCr(black, white) = a2 * 255 |
|
- | 954 | 3. if a1 = 1, ??? maybe: 255 * (1 - a2) + a2 * distYCbCr() |
|
- | 955 | */ |
|
- | 956 | ||
- | 957 | //return MIN (a1, a2) * distYCbCrBuffered(pix1, pix2) + 255 * abs(a1 - a2); |
|
- | 958 | //=> following code is 15% faster: |
|
- | 959 | const double d = dist24 (pix1, pix2); |
|
- | 960 | return (a1 < a2 ? a1 * d + 255 * (a2 - a1) : a2 * d + 255 * (a1 - a2)); |
|
- | 961 | } |
|
- | 962 | ||
- | 963 | ||
1198 |
|
964 | static void alphagrad24 (uint32_t *pixBack, uint32_t pixFront, unsigned int M, unsigned int N) |
1199 | { |
965 | { |
- | 966 | // blend front color with opacity M / N over opaque background: http://en.wikipedia.org/wiki/Alpha_compositing#Alpha_blending |
|
- | 967 | *pixBack = ( (CALC_COLOR24 (GET_RED (pixFront), GET_RED (*pixBack), M, N) << 16) |
|
- | 968 | | (CALC_COLOR24 (GET_GREEN (pixFront), GET_GREEN (*pixBack), M, N) << 8) |
|
- | 969 | | (CALC_COLOR24 (GET_BLUE (pixFront), GET_BLUE (*pixBack), M, N) << 0)); |
|
- | 970 | } |
|
- | 971 | ||
- | 972 | ||
- | 973 | static void alphagrad32 (uint32_t *pixBack, uint32_t pixFront, unsigned int M, unsigned int N) |
|
- | 974 | { |
|
- | 975 | // find intermediate color between two colors with alpha channels (=> NO alpha blending!!!) |
|
- | 976 | const unsigned int weightFront = GET_ALPHA (pixFront) * M; |
|
- | 977 | const unsigned int weightBack = GET_ALPHA (*pixBack) * (N - M); |
|
- | 978 | const unsigned int weightSum = weightFront + weightBack; |
|
- | 979 | *pixBack = (weightSum == 0 ? 0 : |
|
- | 980 | (((unsigned char) (weightSum / N)) << 24) |
|
- | 981 | | (CALC_COLOR32 (GET_RED (pixFront), GET_RED (*pixBack), weightFront, weightBack, weightSum) << 16) |
|
- | 982 | | (CALC_COLOR32 (GET_GREEN (pixFront), GET_GREEN (*pixBack), weightFront, weightBack, weightSum) << 8) |
|
- | 983 | | (CALC_COLOR32 (GET_BLUE (pixFront), GET_BLUE (*pixBack), weightFront, weightBack, weightSum) << 0)); |
|
- | 984 | } |
|
- | 985 | ||
- | 986 | ||
- | 987 | EXTERN_C void nearestNeighborScale(const uint32_t *src, int srcWidth, int srcHeight, uint32_t *trg, int trgWidth, int trgHeight) |
|
- | 988 | { |
|
1200 |
|
989 | // nearestNeighborScale (src, srcWidth, srcHeight, srcWidth * sizeof (uint32_t), trg, trgWidth, trgHeight, trgWidth * sizeof (uint32_t), XBRZ_SLICETYPE_TARGET, 0, trgHeight, [](uint32_t pix) { return pix; }); |
- | 990 | //static_assert(std::is_integral<PixSrc>::value, "PixSrc* is expected to be cast-able to char*"); |
|
- | 991 | //static_assert(std::is_integral<PixTrg>::value, "PixTrg* is expected to be cast-able to char*"); |
|
- | 992 | //static_assert(std::is_same<decltype(pixCvrt(PixSrc())), PixTrg>::value, "PixConverter returning wrong pixel format"); |
|
- | 993 | ||
- | 994 | int srcPitch = srcWidth * sizeof (uint32_t); |
|
- | 995 | int trgPitch = trgWidth * sizeof (uint32_t); |
|
- | 996 | int yFirst; |
|
- | 997 | int yLast; |
|
- | 998 | ||
- | 999 | #if 0 // going over source image - fast for upscaling, since source is read only once |
|
- | 1000 | yFirst = 0; |
|
- | 1001 | yLast = MIN (trgHeight, srcHeight); |
|
- | 1002 | ||
- | 1003 | if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0) |
|
- | 1004 | return; // consistency check |
|
- | 1005 | ||
- | 1006 | for (int y = yFirst; y < yLast; ++y) |
|
- | 1007 | { |
|
- | 1008 | //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight) |
|
- | 1009 | // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight |
|
- | 1010 | ||
- | 1011 | //keep within for loop to support MT input slices! |
|
- | 1012 | const int yTrg_first = ( y * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight) |
|
- | 1013 | const int yTrg_last = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight) |
|
- | 1014 | const int blockHeight = yTrg_last - yTrg_first; |
|
- | 1015 | ||
- | 1016 | if (blockHeight > 0) |
|
- | 1017 | { |
|
- | 1018 | const uint32_t *srcLine = (const uint32_t *) BYTE_ADVANCE (src, y * srcPitch); |
|
- | 1019 | /**/ uint32_t *trgLine = ( uint32_t *) BYTE_ADVANCE (trg, yTrg_first * trgPitch); |
|
- | 1020 | int xTrg_first = 0; |
|
- | 1021 | ||
- | 1022 | for (int x = 0; x < srcWidth; ++x) |
|
- | 1023 | { |
|
- | 1024 | const int xTrg_last = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth; |
|
- | 1025 | const int blockWidth = xTrg_last - xTrg_first; |
|
- | 1026 | if (blockWidth > 0) |
|
- | 1027 | { |
|
- | 1028 | const uint32_t trgColor = srcLine[x]; |
|
- | 1029 | uint32_t *blkLine = trgLine; |
|
- | 1030 | ||
- | 1031 | xTrg_first = xTrg_last; |
|
- | 1032 | ||
- | 1033 | for (int blk_y = 0; blk_y < blockHeight; ++blk_y, blkLine = (uint32_t *) BYTE_ADVANCE (blkLine, trgPitch)) |
|
- | 1034 | for (int blk_x = 0; blk_x < blockWidth; ++blk_x) |
|
- | 1035 | blkLine[blk_x] = trgColor; |
|
- | 1036 | ||
- | 1037 | trgLine += blockWidth; |
|
- | 1038 | } |
|
- | 1039 | } |
|
- | 1040 | } |
|
- | 1041 | } |
|
- | 1042 | #else // going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes! |
|
- | 1043 | yFirst = 0; |
|
- | 1044 | yLast = trgHeight; |
|
- | 1045 | ||
- | 1046 | if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) |
|
- | 1047 | return; // consistency check |
|
- | 1048 | ||
- | 1049 | for (int y = yFirst; y < yLast; ++y) |
|
- | 1050 | { |
|
- | 1051 | /**/ uint32_t *trgLine = ( uint32_t *) BYTE_ADVANCE (trg, y * trgPitch); |
|
- | 1052 | const int ySrc = srcHeight * y / trgHeight; |
|
- | 1053 | const uint32_t *srcLine = (const uint32_t *) BYTE_ADVANCE (src, ySrc * srcPitch); |
|
- | 1054 | for (int x = 0; x < trgWidth; ++x) |
|
- | 1055 | { |
|
- | 1056 | const int xSrc = srcWidth * x / trgWidth; |
|
- | 1057 | trgLine[x] = srcLine[xSrc]; |
|
- | 1058 | } |
|
- | 1059 | } |
|
- | 1060 | #endif // going over source or target |
|
- | 1061 | ||
- | 1062 | return; |
|
1201 | } |
1063 | } |
1202 | 1064 | ||
1203 | 1065 | ||
1204 | EXTERN_C bool xbrz_equalcolortest24 (uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance) |
1066 | EXTERN_C bool xbrz_equalcolortest24 (uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance) |
1205 | { |
1067 | { |
1206 | return ( |
1068 | return (dist24 (col1, col2) < equalColorTolerance); |
1207 | } |
1069 | } |
1208 | 1070 | ||
1209 | 1071 | ||
1210 | EXTERN_C bool xbrz_equalcolortest32 (uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance) |
1072 | EXTERN_C bool xbrz_equalcolortest32 (uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance) |
1211 | { |
1073 | { |
1212 | return ( |
1074 | return (dist32 (col1, col2) < equalColorTolerance); |
1213 | } |
1075 | } |
1214 | 1076 | ||
1215 | 1077 | ||
1216 | EXTERN_C void xbrz_scale24 (size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight) |
1078 | EXTERN_C void xbrz_scale24 (size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight) |
1217 | { |
1079 | { |
1218 | if (factor == 2) return scaleImage<Scaler2x |
1080 | if (factor == 2) return scaleImage<Scaler2x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24); |
1219 | else if (factor == 3) return scaleImage<Scaler3x |
1081 | else if (factor == 3) return scaleImage<Scaler3x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24); |
1220 | else if (factor == 4) return scaleImage<Scaler4x |
1082 | else if (factor == 4) return scaleImage<Scaler4x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24); |
1221 | else if (factor == 5) return scaleImage<Scaler5x |
1083 | else if (factor == 5) return scaleImage<Scaler5x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24); |
1222 | else if (factor == 6) return scaleImage<Scaler6x |
1084 | else if (factor == 6) return scaleImage<Scaler6x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24); |
1223 | } |
1085 | } |
1224 | 1086 | ||
1225 | 1087 | ||
1226 | EXTERN_C void xbrz_scale32 (size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight) |
1088 | EXTERN_C void xbrz_scale32 (size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight) |
1227 | { |
1089 | { |
1228 | if (factor == 2) return scaleImage<Scaler2x |
1090 | if (factor == 2) return scaleImage<Scaler2x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32); |
1229 | else if (factor == 3) return scaleImage<Scaler3x |
1091 | else if (factor == 3) return scaleImage<Scaler3x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32); |
1230 | else if (factor == 4) return scaleImage<Scaler4x |
1092 | else if (factor == 4) return scaleImage<Scaler4x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32); |
1231 | else if (factor == 5) return scaleImage<Scaler5x |
1093 | else if (factor == 5) return scaleImage<Scaler5x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32); |
1232 | else if (factor == 6) return scaleImage<Scaler6x |
1094 | else if (factor == 6) return scaleImage<Scaler6x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32); |
1233 | } |
1095 | } |