Subversion Repositories Games.Prince of Persia

Rev

Rev 3 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 3 Rev 4
Line 47... Line 47...
47
#ifdef __cplusplus
47
#ifdef __cplusplus
48
#define EXTERN_C extern "C"
48
#define EXTERN_C extern "C"
49
#else // !__cplusplus
49
#else // !__cplusplus
50
#define EXTERN_C
50
#define EXTERN_C
51
#endif // __cplusplus
51
#endif // __cplusplus
-
 
52
 
-
 
53
 
-
 
54
#ifdef _MSC_VER
-
 
55
#define FORCE_INLINE __forceinline
-
 
56
#elif defined __GNUC__
-
 
57
#define FORCE_INLINE __attribute__((always_inline)) inline
-
 
58
#else
-
 
59
#define FORCE_INLINE inline
-
 
60
#endif
52
 
61
 
53
 
62
 
54
// scaler configuration
63
// scaler configuration
55
#define XBRZ_CFG_LUMINANCE_WEIGHT 1
64
#define XBRZ_CFG_LUMINANCE_WEIGHT 1
56
#define XBRZ_CFG_EQUAL_COLOR_TOLERANCE 30
65
#define XBRZ_CFG_EQUAL_COLOR_TOLERANCE 30
Line 81... Line 90...
81
 
90
 
82
 
91
 
83
typedef void (alphagrad_func) (uint32_t *pixBack, uint32_t pixFront, unsigned int M, unsigned int N);
92
typedef void (alphagrad_func) (uint32_t *pixBack, uint32_t pixFront, unsigned int M, unsigned int N);
84
typedef double (dist_func) (uint32_t pix1, uint32_t pix2);
93
typedef double (dist_func) (uint32_t pix1, uint32_t pix2);
85
 
94
 
86
 
-
 
87
 
-
 
88
 
-
 
89
namespace
-
 
90
{
-
 
91
#ifdef _MSC_VER
-
 
92
    #define FORCE_INLINE __forceinline
-
 
93
#elif defined __GNUC__
-
 
94
    #define FORCE_INLINE __attribute__((always_inline)) inline
-
 
95
#else
-
 
96
    #define FORCE_INLINE inline
-
 
97
#endif
-
 
98
 
95
 
99
 
96
 
100
enum RotationDegree //clock-wise
97
enum RotationDegree //clock-wise
101
{
98
{
102
    ROT_0 = 0,
99
   ROT_0 = 0,
103
    ROT_90,
100
   ROT_90,
104
    ROT_180,
101
   ROT_180,
105
    ROT_270
102
   ROT_270
106
};
103
};
107
 
104
 
108
 
105
 
109
//calculate input matrix coordinates after rotation at compile time
106
enum BlendType
110
template <RotationDegree rotDeg, size_t I, size_t J, size_t N> struct MatrixRotation;
-
 
111
 
-
 
112
 
-
 
113
template <size_t I, size_t J, size_t N> struct MatrixRotation<ROT_0, I, J, N>
-
 
114
{
107
{
-
 
108
   BLEND_NONE = 0,
115
    static const size_t I_old = I;
109
   BLEND_NORMAL,   //a normal indication to blend
116
    static const size_t J_old = J;
110
   BLEND_DOMINANT, //a strong indication to blend
-
 
111
   //attention: BlendType must fit into the value range of 2 bit!!!
117
};
112
};
118
 
113
 
119
 
114
 
120
template <RotationDegree rotDeg, size_t I, size_t J, size_t N> //(i, j) = (row, col) indices, N = size of (square) matrix
-
 
121
struct MatrixRotation
115
typedef struct blendresult_s
122
{
116
{
-
 
117
   BlendType
123
    static const size_t I_old = N - 1 - MatrixRotation<(RotationDegree)(rotDeg - 1), I, J, N>::J_old; //old coordinates before rotation!
118
      /**/blend_f, blend_g,
124
    static const size_t J_old =         MatrixRotation<(RotationDegree)(rotDeg - 1), I, J, N>::I_old; //
119
      /**/blend_j, blend_k;
125
};
120
} blendresult_t;
126
 
121
 
127
 
122
 
128
template <size_t N, RotationDegree rotDeg> class OutputMatrix
123
typedef struct kernel_3x3_s
129
{
124
{
130
public:
125
   uint32_t
131
    OutputMatrix (uint32_t *out, int outWidth) //access matrix area, top-left at position "out" for image with given width
-
 
132
    {
126
      /**/a, b, c,
133
        out_ = out;
127
      /**/d, e, f,
134
        outWidth_ = outWidth;
128
      /**/g, h, i;
135
    }
129
} kernel_3x3_t;
136
 
130
 
137
    template <size_t I, size_t J> uint32_t &ref() const
-
 
138
    {
-
 
139
        static const size_t I_old = MatrixRotation<rotDeg, I, J, N>::I_old;
-
 
140
        static const size_t J_old = MatrixRotation<rotDeg, I, J, N>::J_old;
-
 
141
 
131
 
142
        return *(out_ + J_old + I_old * outWidth_);
132
typedef struct kernel_4x4_s //kernel for preprocessing step
-
 
133
{
143
    }
134
   uint32_t
-
 
135
      /**/a, b, c, d,
-
 
136
      /**/e, f, g, h,
-
 
137
      /**/i, j, k, l,
-
 
138
      /**/m, n, o, p;
-
 
139
} kernel_4x4_t;
144
 
140
 
145
    uint32_t* out_;
-
 
146
    int outWidth_;
-
 
147
};
-
 
148
 
141
 
-
 
142
typedef struct outmatrix_s
-
 
143
{
-
 
144
   size_t size;
-
 
145
   uint32_t* ptr;
-
 
146
   int stride;
-
 
147
   RotationDegree rotDeg;
-
 
148
} outmatrix_t;
149
 
149
 
150
 
150
 
151
 
-
 
152
enum BlendType
-
 
-
 
151
static void outmatrix_create (outmatrix_t *mat, size_t size, uint32_t *ptr, int stride, RotationDegree rotDeg) //access matrix area, top-left at position "out" for image with given width
153
{
152
{
154
    BLEND_NONE = 0,
153
   mat->size = size;
155
    BLEND_NORMAL,   //a normal indication to blend
154
   mat->ptr = ptr;
156
    BLEND_DOMINANT, //a strong indication to blend
155
   mat->stride = stride;
157
    //attention: BlendType must fit into the value range of 2 bit!!!
156
   mat->rotDeg = rotDeg;
158
};
157
}
159
 
158
 
-
 
159
 
160
struct BlendResult
160
static uint32_t *outmatrix_ref (outmatrix_t *mat, size_t I, size_t J)
161
{
161
{
162
    BlendType
162
   size_t I_old;
163
    /**/blend_f, blend_g,
163
   size_t J_old;
-
 
164
   // calculate input matrix coordinates after rotation: (i, j) = (row, col) indices, N = size of (square) matrix
-
 
165
   if      (mat->rotDeg == ROT_270) { I_old = J;                 J_old = mat->size - 1 - I; }
164
    /**/blend_j, blend_k;
166
   else if (mat->rotDeg == ROT_180) { I_old = mat->size - 1 - I; J_old = mat->size - 1 - J; }
165
};
-
 
-
 
167
   else if (mat->rotDeg == ROT_90)  { I_old = mat->size - 1 - J; J_old = I;                 }
-
 
168
   else                             { I_old = I;                 J_old = J;                 }
166
 
169
 
-
 
170
   return (mat->ptr + I_old * mat->stride + J_old);
-
 
171
}
167
 
172
 
168
struct Kernel_4x4 //kernel for preprocessing step
-
 
169
{
-
 
170
    uint32_t
-
 
171
    /**/a, b, c, d,
-
 
172
    /**/e, f, g, h,
-
 
173
    /**/i, j, k, l,
-
 
174
    /**/m, n, o, p;
-
 
175
};
-
 
176
 
173
 
177
/*
-
 
178
input kernel area naming convention:
-
 
179
-----------------
-
 
180
| A | B | C | D |
-
 
181
----|---|---|---|
-
 
182
| E | F | G | H |   //evaluate the four corners between F, G, J, K
-
 
183
----|---|---|---|   //input pixel is at position F
-
 
184
| I | J | K | L |
-
 
185
----|---|---|---|
-
 
186
| M | N | O | P |
-
 
187
-----------------
-
 
188
*/
-
 
189
FORCE_INLINE //detect blend direction
-
 
190
BlendResult preProcessCorners(const Kernel_4x4& ker, dist_func dist) //result: F, G, J, K corners of "GradientType"
174
static FORCE_INLINE void preProcessCorners (blendresult_t *result, const kernel_4x4_t *ker, dist_func dist)
191
{
175
{
192
    BlendResult result = {};
176
   // detect blend direction
-
 
177
   // result: F, G, J, K corners of "GradientType"
193
 
178
 
-
 
179
   // input kernel area naming convention:
194
    if ((ker.f == ker.g &&
180
   // -----------------
195
         ker.j == ker.k) ||
181
   // | A | B | C | D |
-
 
182
   // ----|---|---|---|
-
 
183
   // | E | F | G | H |   //evaluate the four corners between F, G, J, K
-
 
184
   // ----|---|---|---|   //input pixel is at position F
196
        (ker.f == ker.j &&
185
   // | I | J | K | L |
-
 
186
   // ----|---|---|---|
197
         ker.g == ker.k))
187
   // | M | N | O | P |
198
        return result;
188
   // -----------------
199
 
189
 
200
    const int weight = 4;
190
   memset (result, 0, sizeof (blendresult_t));
201
    double jg = dist (ker.i, ker.f) + dist (ker.f, ker.c) + dist (ker.n, ker.k) + dist (ker.k, ker.h) + weight * dist (ker.j, ker.g);
-
 
202
    double fk = dist (ker.e, ker.j) + dist (ker.j, ker.o) + dist (ker.b, ker.g) + dist (ker.g, ker.l) + weight * dist (ker.f, ker.k);
-
 
203
 
191
 
204
    if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
192
   if (((ker->f == ker->g) && (ker->j == ker->k)) || ((ker->f == ker->j) && (ker->g == ker->k)))
205
    {
193
      return;
206
        const bool dominantGradient = XBRZ_CFG_DOMINANT_DIRECTION_THRESHOLD * jg < fk;
-
 
207
        if (ker.f != ker.g && ker.f != ker.j)
-
 
208
            result.blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
-
 
209
 
194
 
210
        if (ker.k != ker.j && ker.k != ker.g)
-
 
211
            result.blend_k = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
-
 
212
    }
-
 
213
    else if (fk < jg)
195
   const int weight = 4;
214
    {
-
 
215
        const bool dominantGradient = XBRZ_CFG_DOMINANT_DIRECTION_THRESHOLD * fk < jg;
196
   double jg = dist (ker->i, ker->f) + dist (ker->f, ker->c) + dist (ker->n, ker->k) + dist (ker->k, ker->h) + weight * dist (ker->j, ker->g);
216
        if (ker.j != ker.f && ker.j != ker.k)
-
 
217
            result.blend_j = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
197
   double fk = dist (ker->e, ker->j) + dist (ker->j, ker->o) + dist (ker->b, ker->g) + dist (ker->g, ker->l) + weight * dist (ker->f, ker->k);
218
 
198
 
-
 
199
   if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
-
 
200
   {
-
 
201
      const bool dominantGradient = XBRZ_CFG_DOMINANT_DIRECTION_THRESHOLD * jg < fk;
219
        if (ker.g != ker.f && ker.g != ker.k)
202
      if (ker->f != ker->g && ker->f != ker->j)
220
            result.blend_g = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
203
         result->blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
221
    }
-
 
222
    return result;
-
 
223
}
-
 
224
 
204
 
225
struct Kernel_3x3
205
      if (ker->k != ker->j && ker->k != ker->g)
-
 
206
         result->blend_k = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
226
{
207
   }
227
    uint32_t
-
 
228
    /**/a,  b,  c,
-
 
229
    /**/d,  e,  f,
-
 
230
    /**/g,  h,  i;
208
   else if (fk < jg)
231
};
-
 
232
/*
209
   {
233
#define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; }
-
 
234
//we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token
210
      const bool dominantGradient = XBRZ_CFG_DOMINANT_DIRECTION_THRESHOLD * fk < jg;
235
DEF_GETTER(a) DEF_GETTER(b) DEF_GETTER(c)
-
 
236
DEF_GETTER(d) DEF_GETTER(e) DEF_GETTER(f)
211
      if (ker->j != ker->f && ker->j != ker->k)
237
DEF_GETTER(g) DEF_GETTER(h) DEF_GETTER(i)
212
         result->blend_j = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
238
#undef DEF_GETTER
-
 
239
 
213
 
240
#define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_90>(const Kernel_3x3& ker) { return ker.y; }
-
 
241
DEF_GETTER(a, g) DEF_GETTER(b, d) DEF_GETTER(c, a)
214
      if (ker->g != ker->f && ker->g != ker->k)
242
DEF_GETTER(d, h) DEF_GETTER(e, e) DEF_GETTER(f, b)
215
         result->blend_g = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
243
DEF_GETTER(g, i) DEF_GETTER(h, f) DEF_GETTER(i, c)
-
 
-
 
216
   }
244
#undef DEF_GETTER
217
   return;
-
 
218
}
245
 
219
 
-
 
220
// compress four blend types into a single byte
246
#define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_180>(const Kernel_3x3& ker) { return ker.y; }
221
static inline BlendType getTopL (unsigned char b) { return (BlendType) (0x3 & (b >> 0)); }
247
DEF_GETTER(a, i) DEF_GETTER(b, h) DEF_GETTER(c, g)
222
static inline BlendType getTopR (unsigned char b) { return (BlendType) (0x3 & (b >> 2)); }
248
DEF_GETTER(d, f) DEF_GETTER(e, e) DEF_GETTER(f, d)
223
static inline BlendType getBottomR (unsigned char b) { return (BlendType) (0x3 & (b >> 4)); }
249
DEF_GETTER(g, c) DEF_GETTER(h, b) DEF_GETTER(i, a)
224
static inline BlendType getBottomL (unsigned char b) { return (BlendType) (0x3 & (b >> 6)); }
250
#undef DEF_GETTER
-
 
251
 
225
 
252
#define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; }
226
static inline void setTopL (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing!
253
DEF_GETTER(a, c) DEF_GETTER(b, f) DEF_GETTER(c, i)
227
static inline void setTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); }
254
DEF_GETTER(d, b) DEF_GETTER(e, e) DEF_GETTER(f, h)
228
static inline void setBottomR (unsigned char& b, BlendType bt) { b |= (bt << 4); }
255
DEF_GETTER(g, a) DEF_GETTER(h, d) DEF_GETTER(i, g)
229
static inline void setBottomL (unsigned char& b, BlendType bt) { b |= (bt << 6); }
256
#undef DEF_GETTER
-
 
257
*/
-
 
258
 
230
 
259
template <RotationDegree rotDeg> uint32_t inline get_a (const Kernel_3x3& ker) { return ker.a; }
-
 
260
template <RotationDegree rotDeg> uint32_t inline get_b (const Kernel_3x3& ker) { return ker.b; }
-
 
261
template <RotationDegree rotDeg> uint32_t inline get_c (const Kernel_3x3& ker) { return ker.c; }
-
 
262
template <RotationDegree rotDeg> uint32_t inline get_d (const Kernel_3x3& ker) { return ker.d; }
-
 
263
template <RotationDegree rotDeg> uint32_t inline get_e (const Kernel_3x3& ker) { return ker.e; }
-
 
264
template <RotationDegree rotDeg> uint32_t inline get_f (const Kernel_3x3& ker) { return ker.f; }
-
 
265
template <RotationDegree rotDeg> uint32_t inline get_g (const Kernel_3x3& ker) { return ker.g; }
-
 
266
template <RotationDegree rotDeg> uint32_t inline get_h (const Kernel_3x3& ker) { return ker.h; }
-
 
267
template <RotationDegree rotDeg> uint32_t inline get_i (const Kernel_3x3& ker) { return ker.i; }
-
 
268
 
231
 
269
template <> inline uint32_t get_a<ROT_90>(const Kernel_3x3& ker) { return ker.g; }
-
 
270
template <> inline uint32_t get_b<ROT_90>(const Kernel_3x3& ker) { return ker.d; }
-
 
271
template <> inline uint32_t get_c<ROT_90>(const Kernel_3x3& ker) { return ker.a; }
-
 
272
template <> inline uint32_t get_d<ROT_90>(const Kernel_3x3& ker) { return ker.h; }
-
 
273
template <> inline uint32_t get_e<ROT_90>(const Kernel_3x3& ker) { return ker.e; }
-
 
274
template <> inline uint32_t get_f<ROT_90>(const Kernel_3x3& ker) { return ker.b; }
-
 
275
template <> inline uint32_t get_g<ROT_90>(const Kernel_3x3& ker) { return ker.i; }
-
 
276
template <> inline uint32_t get_h<ROT_90>(const Kernel_3x3& ker) { return ker.f; }
-
 
277
template <> inline uint32_t get_i<ROT_90>(const Kernel_3x3& ker) { return ker.c; }
-
 
-
 
232
namespace
-
 
233
{
278
 
234
 
279
template <> inline uint32_t get_a<ROT_180>(const Kernel_3x3& ker) { return ker.i; }
-
 
280
template <> inline uint32_t get_b<ROT_180>(const Kernel_3x3& ker) { return ker.h; }
-
 
281
template <> inline uint32_t get_c<ROT_180>(const Kernel_3x3& ker) { return ker.g; }
-
 
282
template <> inline uint32_t get_d<ROT_180>(const Kernel_3x3& ker) { return ker.f; }
-
 
283
template <> inline uint32_t get_e<ROT_180>(const Kernel_3x3& ker) { return ker.e; }
-
 
284
template <> inline uint32_t get_f<ROT_180>(const Kernel_3x3& ker) { return ker.d; }
-
 
285
template <> inline uint32_t get_g<ROT_180>(const Kernel_3x3& ker) { return ker.c; }
-
 
286
template <> inline uint32_t get_h<ROT_180>(const Kernel_3x3& ker) { return ker.b; }
-
 
287
template <> inline uint32_t get_i<ROT_180>(const Kernel_3x3& ker) { return ker.a; }
-
 
288
 
235
 
289
template <> inline uint32_t get_a<ROT_270>(const Kernel_3x3& ker) { return ker.c; }
-
 
290
template <> inline uint32_t get_b<ROT_270>(const Kernel_3x3& ker) { return ker.f; }
-
 
291
template <> inline uint32_t get_c<ROT_270>(const Kernel_3x3& ker) { return ker.i; }
-
 
292
template <> inline uint32_t get_d<ROT_270>(const Kernel_3x3& ker) { return ker.b; }
-
 
293
template <> inline uint32_t get_e<ROT_270>(const Kernel_3x3& ker) { return ker.e; }
-
 
294
template <> inline uint32_t get_f<ROT_270>(const Kernel_3x3& ker) { return ker.h; }
-
 
295
template <> inline uint32_t get_g<ROT_270>(const Kernel_3x3& ker) { return ker.a; }
-
 
296
template <> inline uint32_t get_h<ROT_270>(const Kernel_3x3& ker) { return ker.d; }
-
 
297
template <> inline uint32_t get_i<ROT_270>(const Kernel_3x3& ker) { return ker.g; }
-
 
298
 
236
 
299
//compress four blend types into a single byte
-
 
300
inline BlendType getTopL   (unsigned char b) { return (BlendType)(0x3 & b); }
-
 
301
inline BlendType getTopR   (unsigned char b) { return (BlendType)(0x3 & (b >> 2)); }
-
 
302
inline BlendType getBottomR(unsigned char b) { return (BlendType)(0x3 & (b >> 4)); }
-
 
303
inline BlendType getBottomL(unsigned char b) { return (BlendType)(0x3 & (b >> 6)); }
-
 
304
 
237
 
-
 
238
   template <class Scaler>
305
inline void setTopL   (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing!
239
   FORCE_INLINE void blendPixel (const int scale_factor, const kernel_3x3_t *ker, uint32_t *target, int trgWidth, unsigned char blendInfo, alphagrad_func alphagrad, dist_func dist, RotationDegree rotDeg) //result of preprocessing all four corners of pixel "e"
-
 
240
   {
306
inline void setTopR   (unsigned char& b, BlendType bt) { b |= (bt << 2); }
241
      // input kernel area naming convention:
-
 
242
      // -------------
-
 
243
      // | A | B | C |
-
 
244
      // ----|---|---|
307
inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); }
245
      // | D | E | F | //input pixel is at position E
-
 
246
      // ----|---|---|
308
inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); }
247
      // | G | H | I |
-
 
248
      // -------------
309
 
249
 
310
template <RotationDegree rotDeg> inline
250
      uint32_t
311
unsigned char rotateBlendInfo (unsigned char b) { return b; }
251
         a, b, c,
312
template <> inline unsigned char rotateBlendInfo<ROT_90 >(unsigned char b) { return ((b << 2) | (b >> 6)) & 0xff; }
252
         d, e, f,
313
template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { return ((b << 4) | (b >> 4)) & 0xff; }
253
         g, h, i;
314
template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; }
254
      unsigned char blend;
315
 
255
 
-
 
256
      if      (rotDeg == ROT_270) { a = ker->c; b = ker->f; c = ker->i; d = ker->b; e = ker->e; f = ker->h; g = ker->a; h = ker->d; i = ker->g; blend = ((blendInfo << 6) | (blendInfo >> 2)) & 0xff; }
-
 
257
      else if (rotDeg == ROT_180) { a = ker->i; b = ker->h; c = ker->g; d = ker->f; e = ker->e; f = ker->d; g = ker->c; h = ker->b; i = ker->a; blend = ((blendInfo << 4) | (blendInfo >> 4)) & 0xff; }
-
 
258
      else if (rotDeg == ROT_90)  { a = ker->g; b = ker->d; c = ker->a; d = ker->h; e = ker->e; f = ker->b; g = ker->i; h = ker->f; i = ker->c; blend = ((blendInfo << 2) | (blendInfo >> 6)) & 0xff; }
-
 
259
      else                        { a = ker->a; b = ker->b; c = ker->c; d = ker->d; e = ker->e; f = ker->f; g = ker->g; h = ker->h; i = ker->i; blend = ((blendInfo << 0) | (blendInfo >> 8)) & 0xff; }
316
 
260
 
317
/*
-
 
318
input kernel area naming convention:
-
 
319
-------------
-
 
320
| A | B | C |
-
 
321
----|---|---|
-
 
322
| D | E | F | //input pixel is at position E
261
      if (getBottomR (blend) >= BLEND_NORMAL)
323
----|---|---|
-
 
324
| G | H | I |
262
      {
325
-------------
-
 
326
*/
-
 
327
template <class Scaler, RotationDegree rotDeg>
-
 
328
FORCE_INLINE void blendPixel(const Kernel_3x3& ker, uint32_t *target, int trgWidth, unsigned char blendInfo, alphagrad_func alphagrad, dist_func dist) //result of preprocessing all four corners of pixel "e"
-
 
329
{
-
 
330
#define a get_a<rotDeg>(ker)
263
         outmatrix_t out;
331
#define b get_b<rotDeg>(ker)
-
 
332
#define c get_c<rotDeg>(ker)
-
 
333
#define d get_d<rotDeg>(ker)
-
 
334
#define e get_e<rotDeg>(ker)
-
 
335
#define f get_f<rotDeg>(ker)
-
 
336
#define g get_g<rotDeg>(ker)
-
 
337
#define h get_h<rotDeg>(ker)
-
 
338
#define i get_i<rotDeg>(ker)
264
         uint32_t px;
339
 
-
 
340
    const unsigned char blend = rotateBlendInfo<rotDeg>(blendInfo);
-
 
341
 
-
 
342
    if (getBottomR(blend) >= BLEND_NORMAL)
-
 
343
    {
-
 
344
        bool doLineBlend;
265
         bool doLineBlend;
345
 
266
 
346
        if (getBottomR(blend) >= BLEND_DOMINANT)
267
         if (getBottomR (blend) >= BLEND_DOMINANT)
347
            doLineBlend = true;
268
            doLineBlend = true;
348
        else if (getTopR(blend) != BLEND_NONE && (dist (e, g) >= XBRZ_CFG_EQUAL_COLOR_TOLERANCE)) //but support double-blending for 90° corners
269
         else if (getTopR (blend) != BLEND_NONE && (dist (e, g) >= XBRZ_CFG_EQUAL_COLOR_TOLERANCE)) //but support double-blending for 90° corners
349
            doLineBlend = false; // make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
270
            doLineBlend = false; // make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
350
        else if (getBottomL(blend) != BLEND_NONE && (dist (e, c) >= XBRZ_CFG_EQUAL_COLOR_TOLERANCE))
271
         else if (getBottomL (blend) != BLEND_NONE && (dist (e, c) >= XBRZ_CFG_EQUAL_COLOR_TOLERANCE))
351
            doLineBlend = false; // make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
272
            doLineBlend = false; // make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
352
        else if ((dist (e, i) >= XBRZ_CFG_EQUAL_COLOR_TOLERANCE)
273
         else if ((dist (e, i) >= XBRZ_CFG_EQUAL_COLOR_TOLERANCE)
353
            && (dist (g, h) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE)
274
            && (dist (g, h) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE)
354
            && (dist (h, i) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE)
275
            && (dist (h, i) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE)
355
            && (dist (i, f) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE)
276
            && (dist (i, f) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE)
356
            && (dist (f, c) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE))
277
            && (dist (f, c) < XBRZ_CFG_EQUAL_COLOR_TOLERANCE))
357
            doLineBlend = false; // no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")
278
            doLineBlend = false; // no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")
358
                else
279
         else
359
            doLineBlend = true;
280
            doLineBlend = true;
360
 
281
 
-
 
282
         outmatrix_create (&out, scale_factor, target, trgWidth, rotDeg);
361
        const uint32_t px = (dist (e, f) <= dist (e, h) ? f : h); //choose most similar color
283
         px = (dist (e, f) <= dist (e, h) ? f : h); //choose most similar color
362
 
284
 
363
        OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth);
-
 
364
 
-
 
365
        if (doLineBlend)
285
         if (doLineBlend)
366
        {
286
         {
367
            const double fg = dist (f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
287
            const double fg = dist (f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
368
            const double hc = dist (h, c); //
288
            const double hc = dist (h, c); //
369
 
-
 
370
            const bool haveShallowLine = XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * fg <= hc && e != g && d != g;
289
            const bool haveShallowLine = (XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * fg <= hc) && (e != g) && (d != g);
371
            const bool haveSteepLine   = XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * hc <= fg && e != c && b != c;
290
            const bool haveSteepLine   = (XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * hc <= fg) && (e != c) && (b != c);
372
 
291
 
373
            if (haveShallowLine)
292
            if (haveShallowLine)
374
            {
293
            {
375
                if (haveSteepLine)
294
               if (haveSteepLine)
376
                    Scaler::blendLineSteepAndShallow(px, out, alphagrad);
295
                  Scaler::blendLineSteepAndShallow (px, &out, alphagrad);
377
                else
296
               else
378
                    Scaler::blendLineShallow(px, out, alphagrad);
297
                  Scaler::blendLineShallow (px, &out, alphagrad);
379
            }
298
            }
380
            else
299
            else
381
            {
300
            {
382
                if (haveSteepLine)
301
               if (haveSteepLine)
383
                    Scaler::blendLineSteep(px, out, alphagrad);
302
                  Scaler::blendLineSteep (px, &out, alphagrad);
384
                else
303
               else
385
                    Scaler::blendLineDiagonal(px, out, alphagrad);
304
                  Scaler::blendLineDiagonal (px, &out, alphagrad);
386
            }
305
            }
387
        }
306
         }
388
        else
307
         else
389
            Scaler::blendCorner(px, out, alphagrad);
308
            Scaler::blendCorner (px, &out, alphagrad);
-
 
309
      }
390
    }
310
   }
391
 
311
 
392
#undef a
-
 
393
#undef b
-
 
394
#undef c
-
 
395
#undef d
-
 
396
#undef e
-
 
397
#undef f
-
 
398
#undef g
-
 
399
#undef h
-
 
400
#undef i
-
 
401
}
-
 
402
 
312
 
-
 
313
   template <class Scaler> //scaler policy: see "Scaler2x" reference implementation
-
 
314
   void scaleImage (const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight, int yFirst, int yLast, alphagrad_func alphagrad, dist_func dist)
-
 
315
   {
-
 
316
      yFirst = MAX (yFirst, 0);
-
 
317
      yLast = MIN (yLast, srcHeight);
-
 
318
      if (yFirst >= yLast || srcWidth <= 0)
-
 
319
         return;
403
 
320
 
404
template <class Scaler> //scaler policy: see "Scaler2x" reference implementation
-
 
405
void scaleImage(const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight, int yFirst, int yLast, alphagrad_func alphagrad, dist_func dist)
-
 
406
{
-
 
407
    yFirst = MAX (yFirst, 0);
-
 
408
    yLast  = MIN (yLast, srcHeight);
-
 
409
    if (yFirst >= yLast || srcWidth <= 0)
-
 
410
        return;
-
 
411
 
-
 
412
    const int trgWidth = srcWidth * Scaler::scale;
321
      const int trgWidth = srcWidth * Scaler::scale;
413
 
322
 
414
    //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of
323
      //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of
415
    //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing
324
      //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing
416
    const int bufferSize = srcWidth;
325
      const int bufferSize = srcWidth;
417
    unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize;
326
      unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize;
418
    memset (preProcBuffer, 0, bufferSize);
327
      memset (preProcBuffer, 0, bufferSize);
419
    static_assert(BLEND_NONE == 0, "");
328
      static_assert(BLEND_NONE == 0, "");
420
 
329
 
421
    //initialize preprocessing buffer for first row of current stripe: detect upper left and right corner blending
330
      //initialize preprocessing buffer for first row of current stripe: detect upper left and right corner blending
422
    //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!
331
      //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!
423
    if (yFirst > 0)
332
      if (yFirst > 0)
424
    {
333
      {
425
        const int y = yFirst - 1;
334
         const int y = yFirst - 1;
426
 
335
 
427
        const uint32_t* s_m1 = src + srcWidth * MAX (y - 1, 0);
336
         const uint32_t* s_m1 = src + srcWidth * MAX (y - 1, 0);
428
        const uint32_t* s_0  = src + srcWidth * y; //center line
337
         const uint32_t* s_0 = src + srcWidth * y; //center line
429
        const uint32_t* s_p1 = src + srcWidth * MIN (y + 1, srcHeight - 1);
338
         const uint32_t* s_p1 = src + srcWidth * MIN (y + 1, srcHeight - 1);
430
        const uint32_t* s_p2 = src + srcWidth * MIN (y + 2, srcHeight - 1);
339
         const uint32_t* s_p2 = src + srcWidth * MIN (y + 2, srcHeight - 1);
431
 
340
 
432
        for (int x = 0; x < srcWidth; ++x)
341
         for (int x = 0; x < srcWidth; ++x)
433
        {
342
         {
-
 
343
            blendresult_t res;
434
            const int x_m1 = MAX (x - 1, 0);
344
            const int x_m1 = MAX (x - 1, 0);
435
            const int x_p1 = MIN (x + 1, srcWidth - 1);
345
            const int x_p1 = MIN (x + 1, srcWidth - 1);
436
            const int x_p2 = MIN (x + 2, srcWidth - 1);
346
            const int x_p2 = MIN (x + 2, srcWidth - 1);
437
 
347
 
438
            Kernel_4x4 ker = {}; //perf: initialization is negligible
348
            kernel_4x4_t ker; //perf: initialization is negligible
439
            ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
349
            ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
440
            ker.b = s_m1[x];
350
            ker.b = s_m1[x];
441
            ker.c = s_m1[x_p1];
351
            ker.c = s_m1[x_p1];
442
            ker.d = s_m1[x_p2];
352
            ker.d = s_m1[x_p2];
443
 
353
 
Line 454... Line 364...
454
            ker.m = s_p2[x_m1];
364
            ker.m = s_p2[x_m1];
455
            ker.n = s_p2[x];
365
            ker.n = s_p2[x];
456
            ker.o = s_p2[x_p1];
366
            ker.o = s_p2[x_p1];
457
            ker.p = s_p2[x_p2];
367
            ker.p = s_p2[x_p2];
458
 
368
 
459
            const BlendResult res = preProcessCorners (ker, dist);
369
            preProcessCorners (&res, &ker, dist);
460
            /*
370
            /*
461
            preprocessing blend result:
371
            preprocessing blend result:
462
            ---------
372
            ---------
463
            | F | G |   //evalute corner between F, G, J, K
373
            | F | G |   //evalute corner between F, G, J, K
464
            ----|---|   //input pixel is at position F
374
            ----|---|   //input pixel is at position F
465
            | J | K |
375
            | J | K |
466
            ---------
376
            ---------
467
            */
377
            */
468
            setTopR(preProcBuffer[x], res.blend_j);
378
            setTopR (preProcBuffer[x], res.blend_j);
469
 
379
 
470
            if (x + 1 < bufferSize)
380
            if (x + 1 < bufferSize)
471
                setTopL(preProcBuffer[x + 1], res.blend_k);
381
               setTopL (preProcBuffer[x + 1], res.blend_k);
472
        }
382
         }
473
    }
383
      }
474
    //------------------------------------------------------------------------------------
384
      //------------------------------------------------------------------------------------
475
 
385
 
476
    for (int y = yFirst; y < yLast; ++y)
386
      for (int y = yFirst; y < yLast; ++y)
477
    {
387
      {
478
        uint32_t *out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access
388
         uint32_t *out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access
479
 
389
 
480
        const uint32_t* s_m1 = src + srcWidth * MAX (y - 1, 0);
390
         const uint32_t* s_m1 = src + srcWidth * MAX (y - 1, 0);
481
        const uint32_t* s_0  = src + srcWidth * y; //center line
391
         const uint32_t* s_0 = src + srcWidth * y; //center line
482
        const uint32_t* s_p1 = src + srcWidth * MIN (y + 1, srcHeight - 1);
392
         const uint32_t* s_p1 = src + srcWidth * MIN (y + 1, srcHeight - 1);
483
        const uint32_t* s_p2 = src + srcWidth * MIN (y + 2, srcHeight - 1);
393
         const uint32_t* s_p2 = src + srcWidth * MIN (y + 2, srcHeight - 1);
484
 
394
 
485
        unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position
395
         unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position
486
 
396
 
487
        for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)
397
         for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)
488
        {
398
         {
489
            //all those bounds checks have only insignificant impact on performance!
399
            //all those bounds checks have only insignificant impact on performance!
490
            const int x_m1 = MAX (x - 1, 0); //perf: prefer array indexing to additional pointers!
400
            const int x_m1 = MAX (x - 1, 0); //perf: prefer array indexing to additional pointers!
491
            const int x_p1 = MIN (x + 1, srcWidth - 1);
401
            const int x_p1 = MIN (x + 1, srcWidth - 1);
492
            const int x_p2 = MIN (x + 2, srcWidth - 1);
402
            const int x_p2 = MIN (x + 2, srcWidth - 1);
493
 
-
 
494
            Kernel_4x4 ker4 = {}; //perf: initialization is negligible
403
            kernel_4x4_t ker4; //perf: initialization is negligible
495
 
404
 
496
            ker4.a = s_m1[x_m1]; //read sequentially from memory as far as possible
405
            ker4.a = s_m1[x_m1]; //read sequentially from memory as far as possible
497
            ker4.b = s_m1[x];
406
            ker4.b = s_m1[x];
498
            ker4.c = s_m1[x_p1];
407
            ker4.c = s_m1[x_p1];
499
            ker4.d = s_m1[x_p2];
408
            ker4.d = s_m1[x_p2];
Line 514... Line 423...
514
            ker4.p = s_p2[x_p2];
423
            ker4.p = s_p2[x_p2];
515
 
424
 
516
            //evaluate the four corners on bottom-right of current pixel
425
            //evaluate the four corners on bottom-right of current pixel
517
            unsigned char blend_xy = 0; //for current (x, y) position
426
            unsigned char blend_xy = 0; //for current (x, y) position
518
            {
427
            {
-
 
428
               blendresult_t res;
519
                const BlendResult res = preProcessCorners (ker4, dist);
429
               preProcessCorners (&res, &ker4, dist);
520
                /*
430
               /*
521
                preprocessing blend result:
431
               preprocessing blend result:
522
                ---------
432
               ---------
523
                | F | G |   //evalute corner between F, G, J, K
433
               | F | G |   //evalute corner between F, G, J, K
524
                ----|---|   //current input pixel is at position F
434
               ----|---|   //current input pixel is at position F
525
                | J | K |
435
               | J | K |
526
                ---------
436
               ---------
527
                */
437
               */
528
                blend_xy = preProcBuffer[x];
438
               blend_xy = preProcBuffer[x];
529
                setBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
439
               setBottomR (blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
530
 
440
 
531
                setTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
441
               setTopR (blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
532
                preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row
442
               preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row
533
 
443
 
534
                blend_xy1 = 0;
444
               blend_xy1 = 0;
535
                setTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
445
               setTopL (blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
536
 
446
 
537
                if (x + 1 < bufferSize) //set 3rd known corner for (x + 1, y)
447
               if (x + 1 < bufferSize) //set 3rd known corner for (x + 1, y)
538
                    setBottomL(preProcBuffer[x + 1], res.blend_g);
448
                  setBottomL (preProcBuffer[x + 1], res.blend_g);
539
            }
449
            }
540
 
450
 
541
            //fill block of size scale * scale with the given color
451
            //fill block of size scale * scale with the given color
542
                        {
452
            {
543
                                uint32_t *blk = out;
453
               uint32_t *blk = out;
544
                            for (int _blk_y = 0; _blk_y < Scaler::scale; ++_blk_y, blk = (uint32_t *) BYTE_ADVANCE (blk, trgWidth * sizeof (uint32_t)))
454
               for (int _blk_y = 0; _blk_y < Scaler::scale; ++_blk_y, blk = (uint32_t *) BYTE_ADVANCE (blk, trgWidth * sizeof (uint32_t)))
545
                                for (int _blk_x = 0; _blk_x < Scaler::scale; ++_blk_x)
455
                  for (int _blk_x = 0; _blk_x < Scaler::scale; ++_blk_x)
546
                                    blk[_blk_x] = ker4.f;
456
                     blk[_blk_x] = ker4.f;
547
                        }
457
            }
548
            //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
458
            //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
549
 
459
 
550
            //blend four corners of current pixel
460
            //blend four corners of current pixel
551
            if (blend_xy != 0) //good 5% perf-improvement
461
            if (blend_xy != 0) //good 5% perf-improvement
552
            {
462
            {
553
                Kernel_3x3 ker3 = {}; //perf: initialization is negligible
463
               kernel_3x3_t ker3; //perf: initialization is negligible
554
 
464
 
555
                ker3.a = ker4.a;
465
               ker3.a = ker4.a;
556
                ker3.b = ker4.b;
466
               ker3.b = ker4.b;
557
                ker3.c = ker4.c;
467
               ker3.c = ker4.c;
558
 
468
 
559
                ker3.d = ker4.e;
469
               ker3.d = ker4.e;
560
                ker3.e = ker4.f;
470
               ker3.e = ker4.f;
561
                ker3.f = ker4.g;
471
               ker3.f = ker4.g;
562
 
472
 
563
                ker3.g = ker4.i;
473
               ker3.g = ker4.i;
564
                ker3.h = ker4.j;
474
               ker3.h = ker4.j;
565
                ker3.i = ker4.k;
475
               ker3.i = ker4.k;
566
 
476
 
567
                blendPixel<Scaler, ROT_0  >(ker3, out, trgWidth, blend_xy, alphagrad, dist);
477
               blendPixel<Scaler> (Scaler::scale, &ker3, out, trgWidth, blend_xy, alphagrad, dist, ROT_0);
568
                blendPixel<Scaler, ROT_90 >(ker3, out, trgWidth, blend_xy, alphagrad, dist);
478
               blendPixel<Scaler> (Scaler::scale, &ker3, out, trgWidth, blend_xy, alphagrad, dist, ROT_90);
569
                blendPixel<Scaler, ROT_180>(ker3, out, trgWidth, blend_xy, alphagrad, dist);
479
               blendPixel<Scaler> (Scaler::scale, &ker3, out, trgWidth, blend_xy, alphagrad, dist, ROT_180);
570
                blendPixel<Scaler, ROT_270>(ker3, out, trgWidth, blend_xy, alphagrad, dist);
480
               blendPixel<Scaler> (Scaler::scale, &ker3, out, trgWidth, blend_xy, alphagrad, dist, ROT_270);
571
            }
481
            }
572
        }
482
         }
573
    }
483
      }
574
}
484
   }
575
 
485
 
576
 
486
 
577
//------------------------------------------------------------------------------------
487
   //------------------------------------------------------------------------------------
578
struct Scaler2x
488
   struct Scaler2x
579
{
489
   {
580
    static const int scale = 2;
490
      static const int scale = 2;
581
 
491
 
582
 
492
 
583
    template <class OutputMatrix>
-
 
584
    static void blendLineShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
493
      static void blendLineShallow (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
585
    {
494
      {
586
        alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4);
495
         alphagrad (outmatrix_ref (out, scale - 1, 0), col, 1, 4);
587
        alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4);
496
         alphagrad (outmatrix_ref (out, scale - 1, 1), col, 3, 4);
588
    }
497
      }
589
 
498
 
590
    template <class OutputMatrix>
-
 
591
    static void blendLineSteep(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
499
      static void blendLineSteep (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
592
    {
500
      {
593
        alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4);
501
         alphagrad (outmatrix_ref (out, 0, scale - 1), col, 1, 4);
594
        alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4);
502
         alphagrad (outmatrix_ref (out, 1, scale - 1), col, 3, 4);
595
    }
503
      }
596
 
504
 
597
    template <class OutputMatrix>
-
 
598
    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
505
      static void blendLineSteepAndShallow (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
599
    {
506
      {
600
        alphagrad (&(out.template ref<1, 0>()), col, 1, 4);
507
         alphagrad (outmatrix_ref (out, 1, 0), col, 1, 4);
601
        alphagrad (&(out.template ref<0, 1>()), col, 1, 4);
508
         alphagrad (outmatrix_ref (out, 0, 1), col, 1, 4);
602
        alphagrad (&(out.template ref<1, 1>()), col, 5, 6); //[!] fixes 7/8 used in xBR
509
         alphagrad (outmatrix_ref (out, 1, 1), col, 5, 6); //[!] fixes 7/8 used in xBR
603
    }
510
      }
604
 
511
 
605
    template <class OutputMatrix>
-
 
606
    static void blendLineDiagonal(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
512
      static void blendLineDiagonal (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
607
    {
513
      {
608
        alphagrad (&(out.template ref<1, 1>()), col, 1, 2);
514
         alphagrad (outmatrix_ref (out, 1, 1), col, 1, 2);
609
    }
515
      }
610
 
516
 
611
    template <class OutputMatrix>
-
 
612
    static void blendCorner(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
517
      static void blendCorner (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
613
    {
518
      {
614
        //model a round corner
519
         //model a round corner
615
        alphagrad (&(out.template ref<1, 1>()), col, 21, 100); //exact: 1 - pi/4 = 0.2146018366
520
         alphagrad (outmatrix_ref (out, 1, 1), col, 21, 100); //exact: 1 - pi/4 = 0.2146018366
616
    }
521
      }
617
};
522
   };
618
 
523
 
619
 
524
 
620
struct Scaler3x
525
   struct Scaler3x
621
{
526
   {
622
    static const int scale = 3;
527
      static const int scale = 3;
623
 
528
 
624
 
529
 
625
    template <class OutputMatrix>
-
 
626
    static void blendLineShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
530
      static void blendLineShallow (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
627
    {
531
      {
628
        alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4);
532
         alphagrad (outmatrix_ref (out, scale - 1, 0), col, 1, 4);
629
        alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4);
533
         alphagrad (outmatrix_ref (out, scale - 2, 2), col, 1, 4);
630
        alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4);
534
         alphagrad (outmatrix_ref (out, scale - 1, 1), col, 3, 4);
631
        out.template ref<scale - 1, 2>() = col;
535
         *outmatrix_ref (out, scale - 1, 2) = col;
632
    }
536
      }
633
 
537
 
634
    template <class OutputMatrix>
-
 
635
    static void blendLineSteep(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
538
      static void blendLineSteep (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
636
    {
539
      {
637
        alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4);
540
         alphagrad (outmatrix_ref (out, 0, scale - 1), col, 1, 4);
638
        alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4);
541
         alphagrad (outmatrix_ref (out, 2, scale - 2), col, 1, 4);
639
        alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4);
542
         alphagrad (outmatrix_ref (out, 1, scale - 1), col, 3, 4);
640
        out.template ref<2, scale - 1>() = col;
543
         *outmatrix_ref (out, 2, scale - 1) = col;
641
    }
544
      }
642
 
545
 
643
    template <class OutputMatrix>
-
 
644
    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
546
      static void blendLineSteepAndShallow (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
645
    {
547
      {
646
        alphagrad (&(out.template ref<2, 0>()), col, 1, 4);
548
         alphagrad (outmatrix_ref (out, 2, 0), col, 1, 4);
647
        alphagrad (&(out.template ref<0, 2>()), col, 1, 4);
549
         alphagrad (outmatrix_ref (out, 0, 2), col, 1, 4);
648
        alphagrad (&(out.template ref<2, 1>()), col, 3, 4);
550
         alphagrad (outmatrix_ref (out, 2, 1), col, 3, 4);
649
        alphagrad (&(out.template ref<1, 2>()), col, 3, 4);
551
         alphagrad (outmatrix_ref (out, 1, 2), col, 3, 4);
650
        out.template ref<2, 2>() = col;
552
         *outmatrix_ref (out, 2, 2) = col;
651
    }
553
      }
652
 
554
 
653
    template <class OutputMatrix>
-
 
654
    static void blendLineDiagonal(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
555
      static void blendLineDiagonal (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
655
    {
556
      {
656
        alphagrad (&(out.template ref<1, 2>()), col, 1, 8); //conflict with other rotations for this odd scale
557
         alphagrad (outmatrix_ref (out, 1, 2), col, 1, 8); //conflict with other rotations for this odd scale
657
        alphagrad (&(out.template ref<2, 1>()), col, 1, 8);
558
         alphagrad (outmatrix_ref (out, 2, 1), col, 1, 8);
658
        alphagrad (&(out.template ref<2, 2>()), col, 7, 8); //
559
         alphagrad (outmatrix_ref (out, 2, 2), col, 7, 8); //
659
    }
560
      }
660
 
561
 
661
    template <class OutputMatrix>
-
 
662
    static void blendCorner(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
562
      static void blendCorner (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
663
    {
563
      {
664
        //model a round corner
564
         //model a round corner
665
        alphagrad (&(out.template ref<2, 2>()), col, 45, 100); //exact: 0.4545939598
565
         alphagrad (outmatrix_ref (out, 2, 2), col, 45, 100); //exact: 0.4545939598
666
        //alphagrad (&(out.template ref<2, 1>()), col, 7, 256); //0.02826017254 -> negligible + avoid conflicts with other rotations for this odd scale
566
         //alphagrad (outmatrix_ref (out, 2, 1), col, 7, 256); //0.02826017254 -> negligible + avoid conflicts with other rotations for this odd scale
667
        //alphagrad (&(out.template ref<1, 2>()), col, 7, 256); //0.02826017254
567
         //alphagrad (outmatrix_ref (out, 1, 2), col, 7, 256); //0.02826017254
668
    }
568
      }
669
};
569
   };
670
 
570
 
671
 
571
 
672
struct Scaler4x
572
   struct Scaler4x
673
{
573
   {
674
    static const int scale = 4;
574
      static const int scale = 4;
675
 
575
 
676
 
576
 
677
    template <class OutputMatrix>
-
 
678
    static void blendLineShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
577
      static void blendLineShallow (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
679
    {
578
      {
680
        alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4);
579
         alphagrad (outmatrix_ref (out, scale - 1, 0), col, 1, 4);
681
        alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4);
580
         alphagrad (outmatrix_ref (out, scale - 2, 2), col, 1, 4);
682
        alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4);
581
         alphagrad (outmatrix_ref (out, scale - 1, 1), col, 3, 4);
683
        alphagrad (&(out.template ref<scale - 2, 3>()), col, 3, 4);
582
         alphagrad (outmatrix_ref (out, scale - 2, 3), col, 3, 4);
-
 
583
         *outmatrix_ref (out, scale - 1, 2) = col;
-
 
584
         *outmatrix_ref (out, scale - 1, 3) = col;
-
 
585
      }
684
 
586
 
-
 
587
      static void blendLineSteep (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
-
 
588
      {
-
 
589
         alphagrad (outmatrix_ref (out, 0, scale - 1), col, 1, 4);
-
 
590
         alphagrad (outmatrix_ref (out, 2, scale - 2), col, 1, 4);
-
 
591
         alphagrad (outmatrix_ref (out, 1, scale - 1), col, 3, 4);
-
 
592
         alphagrad (outmatrix_ref (out, 3, scale - 2), col, 3, 4);
685
        out.template ref<scale - 1, 2>() = col;
593
         *outmatrix_ref (out, 2, scale - 1) = col;
686
        out.template ref<scale - 1, 3>() = col;
594
         *outmatrix_ref (out, 3, scale - 1) = col;
687
    }
595
      }
688
 
596
 
689
    template <class OutputMatrix>
-
 
690
    static void blendLineSteep(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
597
      static void blendLineSteepAndShallow (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
691
    {
598
      {
692
        alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4);
599
         alphagrad (outmatrix_ref (out, 3, 1), col, 3, 4);
693
        alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4);
600
         alphagrad (outmatrix_ref (out, 1, 3), col, 3, 4);
694
        alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4);
601
         alphagrad (outmatrix_ref (out, 3, 0), col, 1, 4);
695
        alphagrad (&(out.template ref<3, scale - 2>()), col, 3, 4);
602
         alphagrad (outmatrix_ref (out, 0, 3), col, 1, 4);
-
 
603
         alphagrad (outmatrix_ref (out, 2, 2), col, 1, 3); //[!] fixes 1/4 used in xBR
-
 
604
         *outmatrix_ref (out, 3, 3) = col;
-
 
605
         *outmatrix_ref (out, 3, 2) = col;
-
 
606
         *outmatrix_ref (out, 2, 3) = col;
-
 
607
      }
696
 
608
 
-
 
609
      static void blendLineDiagonal (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
-
 
610
      {
697
        out.template ref<2, scale - 1>() = col;
611
         alphagrad (outmatrix_ref (out, scale - 1, scale / 2), col, 1, 2);
-
 
612
         alphagrad (outmatrix_ref (out, scale - 2, scale / 2 + 1), col, 1, 2);
698
        out.template ref<3, scale - 1>() = col;
613
         *outmatrix_ref (out, scale - 1, scale - 1) = col;
699
    }
614
      }
700
 
615
 
701
    template <class OutputMatrix>
-
 
702
    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
616
      static void blendCorner (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
703
    {
617
      {
704
        alphagrad (&(out.template ref<3, 1>()), col, 3, 4);
618
         //model a round corner
705
        alphagrad (&(out.template ref<1, 3>()), col, 3, 4);
619
         alphagrad (outmatrix_ref (out, 3, 3), col, 68, 100); //exact: 0.6848532563
706
        alphagrad (&(out.template ref<3, 0>()), col, 1, 4);
-
 
707
        alphagrad (&(out.template ref<0, 3>()), col, 1, 4);
620
         alphagrad (outmatrix_ref (out, 3, 2), col, 9, 100); //0.08677704501
708
        alphagrad (&(out.template ref<2, 2>()), col, 1, 3); //[!] fixes 1/4 used in xBR
621
         alphagrad (outmatrix_ref (out, 2, 3), col, 9, 100); //0.08677704501
-
 
622
      }
-
 
623
   };
709
 
624
 
710
        out.template ref<3, 3>() = col;
-
 
711
        out.template ref<3, 2>() = col;
-
 
712
        out.template ref<2, 3>() = col;
-
 
713
    }
-
 
714
 
625
 
715
    template <class OutputMatrix>
626
   struct Scaler5x
716
    static void blendLineDiagonal(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
-
 
717
    {
627
   {
718
        alphagrad (&(out.template ref<scale - 1, scale / 2    >()), col, 1, 2);
-
 
719
        alphagrad (&(out.template ref<scale - 2, scale / 2 + 1>()), col, 1, 2);
628
      static const int scale = 5;
720
 
629
 
721
        out.template ref<scale - 1, scale - 1>() = col;
-
 
722
    }
-
 
723
 
630
 
724
    template <class OutputMatrix>
-
 
725
    static void blendCorner(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
631
      static void blendLineShallow (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
726
    {
632
      {
727
        //model a round corner
633
         alphagrad (outmatrix_ref (out, scale - 1, 0), col, 1, 4);
-
 
634
         alphagrad (outmatrix_ref (out, scale - 2, 2), col, 1, 4);
728
        alphagrad (&(out.template ref<3, 3>()), col, 68, 100); //exact: 0.6848532563
635
         alphagrad (outmatrix_ref (out, scale - 3, 4), col, 1, 4);
729
        alphagrad (&(out.template ref<3, 2>()), col,  9, 100); //0.08677704501
636
         alphagrad (outmatrix_ref (out, scale - 1, 1), col, 3, 4);
730
        alphagrad (&(out.template ref<2, 3>()), col,  9, 100); //0.08677704501
637
         alphagrad (outmatrix_ref (out, scale - 2, 3), col, 3, 4);
-
 
638
         *outmatrix_ref (out, scale - 1, 2) = col;
-
 
639
         *outmatrix_ref (out, scale - 1, 3) = col;
-
 
640
         *outmatrix_ref (out, scale - 1, 4) = col;
-
 
641
         *outmatrix_ref (out, scale - 2, 4) = col;
731
    }
642
      }
732
};
-
 
733
 
643
 
-
 
644
      static void blendLineSteep (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
-
 
645
      {
-
 
646
         alphagrad (outmatrix_ref (out, 0, scale - 1), col, 1, 4);
-
 
647
         alphagrad (outmatrix_ref (out, 2, scale - 2), col, 1, 4);
-
 
648
         alphagrad (outmatrix_ref (out, 4, scale - 3), col, 1, 4);
-
 
649
         alphagrad (outmatrix_ref (out, 1, scale - 1), col, 3, 4);
-
 
650
         alphagrad (outmatrix_ref (out, 3, scale - 2), col, 3, 4);
-
 
651
         *outmatrix_ref (out, 2, scale - 1) = col;
-
 
652
         *outmatrix_ref (out, 3, scale - 1) = col;
-
 
653
         *outmatrix_ref (out, 4, scale - 1) = col;
-
 
654
         *outmatrix_ref (out, 4, scale - 2) = col;
-
 
655
      }
734
 
656
 
735
struct Scaler5x
657
      static void blendLineSteepAndShallow (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
736
{
658
      {
-
 
659
         alphagrad (outmatrix_ref (out, 0, scale - 1), col, 1, 4);
-
 
660
         alphagrad (outmatrix_ref (out, 2, scale - 2), col, 1, 4);
-
 
661
         alphagrad (outmatrix_ref (out, 1, scale - 1), col, 3, 4);
-
 
662
         alphagrad (outmatrix_ref (out, scale - 1, 0), col, 1, 4);
-
 
663
         alphagrad (outmatrix_ref (out, scale - 2, 2), col, 1, 4);
-
 
664
         alphagrad (outmatrix_ref (out, scale - 1, 1), col, 3, 4);
-
 
665
         alphagrad (outmatrix_ref (out, 3, 3), col, 2, 3);
-
 
666
         *outmatrix_ref (out, 2, scale - 1) = col;
737
    static const int scale = 5;
667
         *outmatrix_ref (out, 3, scale - 1) = col;
-
 
668
         *outmatrix_ref (out, 4, scale - 1) = col;
-
 
669
         *outmatrix_ref (out, scale - 1, 2) = col;
-
 
670
         *outmatrix_ref (out, scale - 1, 3) = col;
-
 
671
      }
738
 
672
 
-
 
673
      static void blendLineDiagonal (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
-
 
674
      {
-
 
675
         alphagrad (outmatrix_ref (out, scale - 1, scale / 2 + 0), col, 1, 8); //conflict with other rotations for this odd scale
-
 
676
         alphagrad (outmatrix_ref (out, scale - 2, scale / 2 + 1), col, 1, 8);
-
 
677
         alphagrad (outmatrix_ref (out, scale - 3, scale / 2 + 2), col, 1, 8); //
-
 
678
         alphagrad (outmatrix_ref (out, 4, 3), col, 7, 8);
-
 
679
         alphagrad (outmatrix_ref (out, 3, 4), col, 7, 8);
-
 
680
         *outmatrix_ref (out, 4, 4) = col;
-
 
681
      }
739
 
682
 
740
    template <class OutputMatrix>
-
 
741
    static void blendLineShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
683
      static void blendCorner (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
742
    {
684
      {
743
        alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4);
685
         // model a round corner
744
        alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4);
686
         alphagrad (outmatrix_ref (out, 4, 4), col, 86, 100); //exact: 0.8631434088
745
        alphagrad (&(out.template ref<scale - 3, 4>()), col, 1, 4);
687
         alphagrad (outmatrix_ref (out, 4, 3), col, 23, 100); //0.2306749731
746
        alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4);
688
         alphagrad (outmatrix_ref (out, 3, 4), col, 23, 100); //0.2306749731
-
 
689
         //alphagrad (outmatrix_ref (out, 4, 2), col, 1, 64); //0.01676812367 -> negligible + avoid conflicts with other rotations for this odd scale
747
        alphagrad (&(out.template ref<scale - 2, 3>()), col, 3, 4);
690
         //alphagrad (outmatrix_ref (out, 2, 4), col, 1, 64); //0.01676812367
-
 
691
      }
-
 
692
   };
748
 
693
 
749
        out.template ref<scale - 1, 2>() = col;
-
 
750
        out.template ref<scale - 1, 3>() = col;
-
 
751
        out.template ref<scale - 1, 4>() = col;
-
 
752
        out.template ref<scale - 2, 4>() = col;
-
 
753
    }
-
 
754
 
694
 
755
    template <class OutputMatrix>
695
   struct Scaler6x
756
    static void blendLineSteep(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
-
 
757
    {
696
   {
758
        alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4);
-
 
759
        alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4);
-
 
760
        alphagrad (&(out.template ref<4, scale - 3>()), col, 1, 4);
-
 
761
        alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4);
-
 
762
        alphagrad (&(out.template ref<3, scale - 2>()), col, 3, 4);
697
      static const int scale = 6;
763
 
698
 
764
        out.template ref<2, scale - 1>() = col;
-
 
765
        out.template ref<3, scale - 1>() = col;
-
 
766
        out.template ref<4, scale - 1>() = col;
-
 
767
        out.template ref<4, scale - 2>() = col;
-
 
768
    }
-
 
769
 
699
 
770
    template <class OutputMatrix>
-
 
771
    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
700
      static void blendLineShallow (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
772
    {
701
      {
773
        alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4);
702
         alphagrad (outmatrix_ref (out, scale - 1, 0), col, 1, 4);
774
        alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4);
703
         alphagrad (outmatrix_ref (out, scale - 2, 2), col, 1, 4);
775
        alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4);
704
         alphagrad (outmatrix_ref (out, scale - 3, 4), col, 1, 4);
776
        alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4);
705
         alphagrad (outmatrix_ref (out, scale - 1, 1), col, 3, 4);
777
        alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4);
706
         alphagrad (outmatrix_ref (out, scale - 2, 3), col, 3, 4);
778
        alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4);
707
         alphagrad (outmatrix_ref (out, scale - 3, 5), col, 3, 4);
779
        alphagrad (&(out.template ref<3, 3>()), col, 2, 3);
-
 
780
 
708
 
781
        out.template ref<2, scale - 1>() = col;
709
         *outmatrix_ref (out, scale - 1, 2) = col;
782
        out.template ref<3, scale - 1>() = col;
710
         *outmatrix_ref (out, scale - 1, 3) = col;
783
        out.template ref<4, scale - 1>() = col;
711
         *outmatrix_ref (out, scale - 1, 4) = col;
784
        out.template ref<scale - 1, 2>() = col;
712
         *outmatrix_ref (out, scale - 1, 5) = col;
-
 
713
         *outmatrix_ref (out, scale - 2, 4) = col;
785
        out.template ref<scale - 1, 3>() = col;
714
         *outmatrix_ref (out, scale - 2, 5) = col;
786
    }
715
      }
787
 
716
 
788
    template <class OutputMatrix>
-
 
789
    static void blendLineDiagonal(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
717
      static void blendLineSteep (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
790
    {
718
      {
791
        alphagrad (&(out.template ref<scale - 1, scale / 2    >()), col, 1, 8); //conflict with other rotations for this odd scale
719
         alphagrad (outmatrix_ref (out, 0, scale - 1), col, 1, 4);
792
        alphagrad (&(out.template ref<scale - 2, scale / 2 + 1>()), col, 1, 8);
720
         alphagrad (outmatrix_ref (out, 2, scale - 2), col, 1, 4);
793
        alphagrad (&(out.template ref<scale - 3, scale / 2 + 2>()), col, 1, 8); //
721
         alphagrad (outmatrix_ref (out, 4, scale - 3), col, 1, 4);
794
        alphagrad (&(out.template ref<4, 3>()), col, 7, 8);
722
         alphagrad (outmatrix_ref (out, 1, scale - 1), col, 3, 4);
795
        alphagrad (&(out.template ref<3, 4>()), col, 7, 8);
723
         alphagrad (outmatrix_ref (out, 3, scale - 2), col, 3, 4);
-
 
724
         alphagrad (outmatrix_ref (out, 5, scale - 3), col, 3, 4);
-
 
725
         *outmatrix_ref (out, 2, scale - 1) = col;
-
 
726
         *outmatrix_ref (out, 3, scale - 1) = col;
-
 
727
         *outmatrix_ref (out, 4, scale - 1) = col;
-
 
728
         *outmatrix_ref (out, 5, scale - 1) = col;
-
 
729
         *outmatrix_ref (out, 4, scale - 2) = col;
-
 
730
         *outmatrix_ref (out, 5, scale - 2) = col;
-
 
731
      }
796
 
732
 
-
 
733
      static void blendLineSteepAndShallow (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
-
 
734
      {
-
 
735
         alphagrad (outmatrix_ref (out, 0, scale - 1), col, 1, 4);
-
 
736
         alphagrad (outmatrix_ref (out, 2, scale - 2), col, 1, 4);
-
 
737
         alphagrad (outmatrix_ref (out, 1, scale - 1), col, 3, 4);
-
 
738
         alphagrad (outmatrix_ref (out, 3, scale - 2), col, 3, 4);
-
 
739
         alphagrad (outmatrix_ref (out, scale - 1, 0), col, 1, 4);
-
 
740
         alphagrad (outmatrix_ref (out, scale - 2, 2), col, 1, 4);
-
 
741
         alphagrad (outmatrix_ref (out, scale - 1, 1), col, 3, 4);
-
 
742
         alphagrad (outmatrix_ref (out, scale - 2, 3), col, 3, 4);
-
 
743
         *outmatrix_ref (out, 2, scale - 1) = col;
-
 
744
         *outmatrix_ref (out, 3, scale - 1) = col;
-
 
745
         *outmatrix_ref (out, 4, scale - 1) = col;
-
 
746
         *outmatrix_ref (out, 5, scale - 1) = col;
-
 
747
         *outmatrix_ref (out, 4, scale - 2) = col;
-
 
748
         *outmatrix_ref (out, 5, scale - 2) = col;
-
 
749
         *outmatrix_ref (out, scale - 1, 2) = col;
797
        out.template ref<4, 4>() = col;
750
         *outmatrix_ref (out, scale - 1, 3) = col;
798
    }
751
      }
799
 
752
 
800
    template <class OutputMatrix>
-
 
801
    static void blendCorner(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
753
      static void blendLineDiagonal (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
802
    {
754
      {
803
        // model a round corner
755
         alphagrad (outmatrix_ref (out, scale - 1, scale / 2 + 0), col, 1, 2);
804
        alphagrad (&(out.template ref<4, 4>()), col, 86, 100); //exact: 0.8631434088
756
         alphagrad (outmatrix_ref (out, scale - 2, scale / 2 + 1), col, 1, 2);
805
        alphagrad (&(out.template ref<4, 3>()), col, 23, 100); //0.2306749731
757
         alphagrad (outmatrix_ref (out, scale - 3, scale / 2 + 2), col, 1, 2);
806
        alphagrad (&(out.template ref<3, 4>()), col, 23, 100); //0.2306749731
758
         *outmatrix_ref (out, scale - 2, scale - 1) = col;
807
        //alphaGrad<1, 64>(out.template ref<4, 2>(), col); //0.01676812367 -> negligible + avoid conflicts with other rotations for this odd scale
759
         *outmatrix_ref (out, scale - 1, scale - 1) = col;
808
        //alphaGrad<1, 64>(out.template ref<2, 4>(), col); //0.01676812367
760
         *outmatrix_ref (out, scale - 1, scale - 2) = col;
809
    }
761
      }
810
};
-
 
811
 
762
 
-
 
763
      static void blendCorner (uint32_t col, outmatrix_t *out, alphagrad_func alphagrad)
-
 
764
      {
-
 
765
         //model a round corner
-
 
766
         alphagrad (outmatrix_ref (out, 5, 5), col, 97, 100); //exact: 0.9711013910
-
 
767
         alphagrad (outmatrix_ref (out, 4, 5), col, 42, 100); //0.4236372243
-
 
768
         alphagrad (outmatrix_ref (out, 5, 4), col, 42, 100); //0.4236372243
-
 
769
         alphagrad (outmatrix_ref (out, 5, 3), col, 6, 100); //0.05652034508
-
 
770
         alphagrad (outmatrix_ref (out, 3, 5), col, 6, 100); //0.05652034508
-
 
771
      }
-
 
772
   };
812
 
773
 
813
struct Scaler6x
-
 
814
{
-
 
815
    static const int scale = 6;
-
 
816
 
-
 
817
 
-
 
818
    template <class OutputMatrix>
-
 
819
    static void blendLineShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
-
 
820
    {
-
 
821
        alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4);
-
 
822
        alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4);
-
 
823
        alphagrad (&(out.template ref<scale - 3, 4>()), col, 1, 4);
-
 
824
        alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4);
-
 
825
        alphagrad (&(out.template ref<scale - 2, 3>()), col, 3, 4);
-
 
826
        alphagrad (&(out.template ref<scale - 3, 5>()), col, 3, 4);
-
 
827
 
-
 
828
        out.template ref<scale - 1, 2>() = col;
-
 
829
        out.template ref<scale - 1, 3>() = col;
-
 
830
        out.template ref<scale - 1, 4>() = col;
-
 
831
        out.template ref<scale - 1, 5>() = col;
-
 
832
        out.template ref<scale - 2, 4>() = col;
-
 
833
        out.template ref<scale - 2, 5>() = col;
-
 
834
    }
-
 
835
 
-
 
836
    template <class OutputMatrix>
-
 
837
    static void blendLineSteep(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
-
 
838
    {
-
 
839
        alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4);
-
 
840
        alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4);
-
 
841
        alphagrad (&(out.template ref<4, scale - 3>()), col, 1, 4);
-
 
842
        alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4);
-
 
843
        alphagrad (&(out.template ref<3, scale - 2>()), col, 3, 4);
-
 
844
        alphagrad (&(out.template ref<5, scale - 3>()), col, 3, 4);
-
 
845
 
-
 
846
        out.template ref<2, scale - 1>() = col;
-
 
847
        out.template ref<3, scale - 1>() = col;
-
 
848
        out.template ref<4, scale - 1>() = col;
-
 
849
        out.template ref<5, scale - 1>() = col;
-
 
850
        out.template ref<4, scale - 2>() = col;
-
 
851
        out.template ref<5, scale - 2>() = col;
-
 
852
    }
-
 
853
 
-
 
854
    template <class OutputMatrix>
-
 
855
    static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
-
 
856
    {
-
 
857
        alphagrad (&(out.template ref<0, scale - 1>()), col, 1, 4);
-
 
858
        alphagrad (&(out.template ref<2, scale - 2>()), col, 1, 4);
-
 
859
        alphagrad (&(out.template ref<1, scale - 1>()), col, 3, 4);
-
 
860
        alphagrad (&(out.template ref<3, scale - 2>()), col, 3, 4);
-
 
861
        alphagrad (&(out.template ref<scale - 1, 0>()), col, 1, 4);
-
 
862
        alphagrad (&(out.template ref<scale - 2, 2>()), col, 1, 4);
-
 
863
        alphagrad (&(out.template ref<scale - 1, 1>()), col, 3, 4);
-
 
864
        alphagrad (&(out.template ref<scale - 2, 3>()), col, 3, 4);
-
 
865
 
-
 
866
        out.template ref<2, scale - 1>() = col;
-
 
867
        out.template ref<3, scale - 1>() = col;
-
 
868
        out.template ref<4, scale - 1>() = col;
-
 
869
        out.template ref<5, scale - 1>() = col;
-
 
870
        out.template ref<4, scale - 2>() = col;
-
 
871
        out.template ref<5, scale - 2>() = col;
-
 
872
        out.template ref<scale - 1, 2>() = col;
-
 
873
        out.template ref<scale - 1, 3>() = col;
-
 
874
    }
-
 
875
 
-
 
876
    template <class OutputMatrix>
-
 
877
    static void blendLineDiagonal(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
-
 
878
    {
-
 
879
        alphagrad (&(out.template ref<scale - 1, scale / 2    >()), col, 1, 2);
-
 
880
        alphagrad (&(out.template ref<scale - 2, scale / 2 + 1>()), col, 1, 2);
-
 
881
        alphagrad (&(out.template ref<scale - 3, scale / 2 + 2>()), col, 1, 2);
-
 
882
 
-
 
883
        out.template ref<scale - 2, scale - 1>() = col;
-
 
884
        out.template ref<scale - 1, scale - 1>() = col;
-
 
885
        out.template ref<scale - 1, scale - 2>() = col;
-
 
886
    }
-
 
887
 
-
 
888
    template <class OutputMatrix>
-
 
889
    static void blendCorner(uint32_t col, OutputMatrix& out, alphagrad_func alphagrad)
-
 
890
    {
-
 
891
        //model a round corner
-
 
892
        alphagrad (&(out.template ref<5, 5>()), col, 97, 100); //exact: 0.9711013910
-
 
893
        alphagrad (&(out.template ref<4, 5>()), col, 42, 100); //0.4236372243
-
 
894
        alphagrad (&(out.template ref<5, 4>()), col, 42, 100); //0.4236372243
-
 
895
        alphagrad (&(out.template ref<5, 3>()), col,  6, 100); //0.05652034508
-
 
896
        alphagrad (&(out.template ref<3, 5>()), col,  6, 100); //0.05652034508
-
 
897
    }
-
 
898
};
-
 
899
 
-
 
900
        //------------------------------------------------------------------------------------
774
   //------------------------------------------------------------------------------------
901
}
775
}
902
 
776
 
903
 
777
 
904
 
778
 
905
static double dist24 (uint32_t pix1, uint32_t pix2)
779
static double dist24 (uint32_t pix1, uint32_t pix2)
906
{
780
{
907
    //30% perf boost compared to plain distYCbCr()!
781
   //30% perf boost compared to plain distYCbCr()!
908
    //consumes 64 MB memory; using double is only 2% faster, but takes 128 MB
782
   //consumes 64 MB memory; using double is only 2% faster, but takes 128 MB
909
    static float diffToDist[256 * 256 * 256];
783
   static float diffToDist[256 * 256 * 256];
910
    static bool is_initialized = false;
784
   static bool is_initialized = false;
911
    if (!is_initialized)
785
   if (!is_initialized)
912
    {
786
   {
913
        for (uint32_t i = 0; i < 256 * 256 * 256; ++i) //startup time: 114 ms on Intel Core i5 (four cores)
787
      for (uint32_t i = 0; i < 256 * 256 * 256; ++i) //startup time: 114 ms on Intel Core i5 (four cores)
914
        {
788
      {
915
            const int r_diff = GET_RED (i) * 2 - 0xFF;
789
         const int r_diff = GET_RED (i) * 2 - 0xFF;
916
            const int g_diff = GET_GREEN (i) * 2 - 0xFF;
790
         const int g_diff = GET_GREEN (i) * 2 - 0xFF;
917
            const int b_diff = GET_BLUE (i) * 2 - 0xFF;
791
         const int b_diff = GET_BLUE (i) * 2 - 0xFF;
918
 
792
 
919
            const double k_b = 0.0593; //ITU-R BT.2020 conversion
793
         const double k_b = 0.0593; //ITU-R BT.2020 conversion
920
            const double k_r = 0.2627; //
794
         const double k_r = 0.2627; //
921
            const double k_g = 1 - k_b - k_r;
795
         const double k_g = 1 - k_b - k_r;
922
 
796
 
923
            const double scale_b = 0.5 / (1 - k_b);
797
         const double scale_b = 0.5 / (1 - k_b);
924
            const double scale_r = 0.5 / (1 - k_r);
798
         const double scale_r = 0.5 / (1 - k_r);
925
 
799
 
926
            const double y   = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
800
         const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
927
            const double c_b = scale_b * (b_diff - y);
801
         const double c_b = scale_b * (b_diff - y);
928
            const double c_r = scale_r * (r_diff - y);
802
         const double c_r = scale_r * (r_diff - y);
929
 
803
 
930
            diffToDist[i] = (float) (sqrt ((y * y) + (c_b * c_b) + (c_r * c_r)));
804
         diffToDist[i] = (float) (sqrt ((y * y) + (c_b * c_b) + (c_r * c_r)));
931
        }
805
      }
932
        is_initialized = true;
806
      is_initialized = true;
933
    }
807
   }
934
 
808
 
935
    const int r_diff = (int) GET_RED   (pix1) - (int) GET_RED   (pix2);
809
   const int r_diff = (int) GET_RED (pix1) - (int) GET_RED (pix2);
936
    const int g_diff = (int) GET_GREEN (pix1) - (int) GET_GREEN (pix2);
810
   const int g_diff = (int) GET_GREEN (pix1) - (int) GET_GREEN (pix2);
937
    const int b_diff = (int) GET_BLUE  (pix1) - (int) GET_BLUE  (pix2);
811
   const int b_diff = (int) GET_BLUE (pix1) - (int) GET_BLUE (pix2);
938
 
812
 
939
    return diffToDist[(((r_diff + 0xFF) / 2) << 16) | //slightly reduce precision (division by 2) to squeeze value into single byte
813
   return diffToDist[(((r_diff + 0xFF) / 2) << 16) | //slightly reduce precision (division by 2) to squeeze value into single byte
940
                      (((g_diff + 0xFF) / 2) <<  8) |
814
      (((g_diff + 0xFF) / 2) << 8) |
941
                      (((b_diff + 0xFF) / 2) <<  0)];
815
      (((b_diff + 0xFF) / 2) << 0)];
942
}
816
}
943
 
817
 
944
 
818
 
945
static double dist32 (uint32_t pix1, uint32_t pix2)
819
static double dist32 (uint32_t pix1, uint32_t pix2)
946
{
820
{
947
    const double a1 = GET_ALPHA (pix1) / 255.0 ;
821
   const double a1 = GET_ALPHA (pix1) / 255.0;
948
    const double a2 = GET_ALPHA (pix2) / 255.0 ;
822
   const double a2 = GET_ALPHA (pix2) / 255.0;
949
    /*
823
   /*
950
    Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1]
824
   Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1]
951
 
825
 
952
        1. if a1 = a2, distance should be: a1 * distYCbCr()
826
       1. if a1 = a2, distance should be: a1 * distYCbCr()
953
        2. if a1 = 0,  distance should be: a2 * distYCbCr(black, white) = a2 * 255
827
       2. if a1 = 0,  distance should be: a2 * distYCbCr(black, white) = a2 * 255
954
        3. if a1 = 1,  ??? maybe: 255 * (1 - a2) + a2 * distYCbCr()
828
       3. if a1 = 1,  ??? maybe: 255 * (1 - a2) + a2 * distYCbCr()
955
    */
829
   */
956
 
830
 
957
    //return MIN (a1, a2) * distYCbCrBuffered(pix1, pix2) + 255 * abs(a1 - a2);
831
   //return MIN (a1, a2) * distYCbCrBuffered(pix1, pix2) + 255 * abs(a1 - a2);
958
    //=> following code is 15% faster:
832
   //=> following code is 15% faster:
959
    const double d = dist24 (pix1, pix2);
833
   const double d = dist24 (pix1, pix2);
960
    return (a1 < a2 ? a1 * d + 255 * (a2 - a1) : a2 * d + 255 * (a1 - a2));
834
   return (a1 < a2 ? a1 * d + 255 * (a2 - a1) : a2 * d + 255 * (a1 - a2));
961
}
835
}
962
 
836
 
963
 
837
 
964
static void alphagrad24 (uint32_t *pixBack, uint32_t pixFront, unsigned int M, unsigned int N)
838
static void alphagrad24 (uint32_t *pixBack, uint32_t pixFront, unsigned int M, unsigned int N)
965
{
839
{
966
        // blend front color with opacity M / N over opaque background: http://en.wikipedia.org/wiki/Alpha_compositing#Alpha_blending
840
   // blend front color with opacity M / N over opaque background: http://en.wikipedia.org/wiki/Alpha_compositing#Alpha_blending
967
        *pixBack = (  (CALC_COLOR24 (GET_RED   (pixFront), GET_RED   (*pixBack), M, N) << 16)
841
   *pixBack = ((CALC_COLOR24 (GET_RED (pixFront), GET_RED (*pixBack), M, N) << 16)
968
                                | (CALC_COLOR24 (GET_GREEN (pixFront), GET_GREEN (*pixBack), M, N) <<  8)
842
      | (CALC_COLOR24 (GET_GREEN (pixFront), GET_GREEN (*pixBack), M, N) << 8)
969
                                | (CALC_COLOR24 (GET_BLUE  (pixFront), GET_BLUE  (*pixBack), M, N) <<  0));
843
      | (CALC_COLOR24 (GET_BLUE (pixFront), GET_BLUE (*pixBack), M, N) << 0));
970
}
844
}
971
 
845
 
972
 
846
 
973
static void alphagrad32 (uint32_t *pixBack, uint32_t pixFront, unsigned int M, unsigned int N)
847
static void alphagrad32 (uint32_t *pixBack, uint32_t pixFront, unsigned int M, unsigned int N)
974
{
848
{
975
        // find intermediate color between two colors with alpha channels (=> NO alpha blending!!!)
849
   // find intermediate color between two colors with alpha channels (=> NO alpha blending!!!)
976
    const unsigned int weightFront = GET_ALPHA (pixFront) * M;
850
   const unsigned int weightFront = GET_ALPHA (pixFront) * M;
977
    const unsigned int weightBack  = GET_ALPHA (*pixBack) * (N - M);
851
   const unsigned int weightBack = GET_ALPHA (*pixBack) * (N - M);
978
    const unsigned int weightSum   = weightFront + weightBack;
852
   const unsigned int weightSum = weightFront + weightBack;
979
    *pixBack = (weightSum == 0 ? 0 :
853
   *pixBack = (weightSum == 0 ? 0 :
980
                                (((unsigned char) (weightSum / N))                                                               << 24)
854
      (((unsigned char) (weightSum / N)) << 24)
981
                                | (CALC_COLOR32 (GET_RED   (pixFront), GET_RED   (*pixBack), weightFront, weightBack, weightSum) << 16)
855
      | (CALC_COLOR32 (GET_RED (pixFront), GET_RED (*pixBack), weightFront, weightBack, weightSum) << 16)
982
                                | (CALC_COLOR32 (GET_GREEN (pixFront), GET_GREEN (*pixBack), weightFront, weightBack, weightSum) <<  8)
856
      | (CALC_COLOR32 (GET_GREEN (pixFront), GET_GREEN (*pixBack), weightFront, weightBack, weightSum) << 8)
983
                                | (CALC_COLOR32 (GET_BLUE  (pixFront), GET_BLUE  (*pixBack), weightFront, weightBack, weightSum) <<  0));
857
      | (CALC_COLOR32 (GET_BLUE (pixFront), GET_BLUE (*pixBack), weightFront, weightBack, weightSum) << 0));
984
}
858
}
985
 
859
 
986
 
860
 
987
EXTERN_C void nearestNeighborScale(const uint32_t *src, int srcWidth, int srcHeight, uint32_t *trg, int trgWidth, int trgHeight)
861
EXTERN_C void nearestNeighborScale (const uint32_t *src, int srcWidth, int srcHeight, uint32_t *trg, int trgWidth, int trgHeight)
988
{
862
{
989
//    nearestNeighborScale (src, srcWidth, srcHeight, srcWidth * sizeof (uint32_t), trg, trgWidth, trgHeight, trgWidth * sizeof (uint32_t), XBRZ_SLICETYPE_TARGET, 0, trgHeight, [](uint32_t pix) { return pix; });
863
   //    nearestNeighborScale (src, srcWidth, srcHeight, srcWidth * sizeof (uint32_t), trg, trgWidth, trgHeight, trgWidth * sizeof (uint32_t), XBRZ_SLICETYPE_TARGET, 0, trgHeight, [](uint32_t pix) { return pix; });
990
    //static_assert(std::is_integral<PixSrc>::value, "PixSrc* is expected to be cast-able to char*");
864
       //static_assert(std::is_integral<PixSrc>::value, "PixSrc* is expected to be cast-able to char*");
991
    //static_assert(std::is_integral<PixTrg>::value, "PixTrg* is expected to be cast-able to char*");
865
       //static_assert(std::is_integral<PixTrg>::value, "PixTrg* is expected to be cast-able to char*");
992
    //static_assert(std::is_same<decltype(pixCvrt(PixSrc())), PixTrg>::value, "PixConverter returning wrong pixel format");
866
       //static_assert(std::is_same<decltype(pixCvrt(PixSrc())), PixTrg>::value, "PixConverter returning wrong pixel format");
993
 
867
 
994
    int srcPitch = srcWidth * sizeof (uint32_t);
868
   int srcPitch = srcWidth * sizeof (uint32_t);
995
    int trgPitch = trgWidth * sizeof (uint32_t);
869
   int trgPitch = trgWidth * sizeof (uint32_t);
996
    int yFirst;
870
   int yFirst;
997
    int yLast;
871
   int yLast;
998
 
872
 
999
#if 0 // going over source image - fast for upscaling, since source is read only once
873
#if 0 // going over source image - fast for upscaling, since source is read only once
1000
    yFirst = 0;
874
   yFirst = 0;
1001
    yLast  = MIN (trgHeight, srcHeight);
875
   yLast = MIN (trgHeight, srcHeight);
1002
 
876
 
1003
    if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0)
877
   if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0)
1004
        return; // consistency check
878
      return; // consistency check
1005
 
879
 
1006
    for (int y = yFirst; y < yLast; ++y)
880
   for (int y = yFirst; y < yLast; ++y)
1007
    {
881
   {
1008
        //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
882
      //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
1009
        // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
883
      // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
1010
 
884
 
1011
        //keep within for loop to support MT input slices!
885
      //keep within for loop to support MT input slices!
1012
        const int yTrg_first = ( y      * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)
886
      const int yTrg_first = (y      * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)
1013
        const int yTrg_last  = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)
887
      const int yTrg_last = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)
1014
        const int blockHeight = yTrg_last - yTrg_first;
888
      const int blockHeight = yTrg_last - yTrg_first;
1015
 
889
 
1016
        if (blockHeight > 0)
890
      if (blockHeight > 0)
1017
        {
891
      {
1018
            const uint32_t *srcLine = (const uint32_t *) BYTE_ADVANCE (src, y * srcPitch);
892
         const uint32_t *srcLine = (const uint32_t *) BYTE_ADVANCE (src, y * srcPitch);
1019
            /**/  uint32_t *trgLine = (      uint32_t *) BYTE_ADVANCE (trg, yTrg_first * trgPitch);
893
         /**/  uint32_t *trgLine = (uint32_t *) BYTE_ADVANCE (trg, yTrg_first * trgPitch);
1020
            int xTrg_first = 0;
894
         int xTrg_first = 0;
1021
 
895
 
1022
            for (int x = 0; x < srcWidth; ++x)
896
         for (int x = 0; x < srcWidth; ++x)
-
 
897
         {
-
 
898
            const int xTrg_last = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth;
-
 
899
            const int blockWidth = xTrg_last - xTrg_first;
-
 
900
            if (blockWidth > 0)
1023
            {
901
            {
1024
                const int xTrg_last = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth;
-
 
1025
                const int blockWidth = xTrg_last - xTrg_first;
-
 
1026
                if (blockWidth > 0)
-
 
1027
                {
-
 
1028
                    const uint32_t trgColor = srcLine[x];
902
               const uint32_t trgColor = srcLine[x];
1029
                                        uint32_t *blkLine = trgLine;
903
               uint32_t *blkLine = trgLine;
1030
 
904
 
1031
                    xTrg_first = xTrg_last;
905
               xTrg_first = xTrg_last;
1032
 
906
 
1033
                                    for (int blk_y = 0; blk_y < blockHeight; ++blk_y, blkLine = (uint32_t *) BYTE_ADVANCE (blkLine, trgPitch))
907
               for (int blk_y = 0; blk_y < blockHeight; ++blk_y, blkLine = (uint32_t *) BYTE_ADVANCE (blkLine, trgPitch))
1034
                                        for (int blk_x = 0; blk_x < blockWidth; ++blk_x)
908
                  for (int blk_x = 0; blk_x < blockWidth; ++blk_x)
1035
                                            blkLine[blk_x] = trgColor;
909
                     blkLine[blk_x] = trgColor;
1036
 
910
 
1037
                    trgLine += blockWidth;
911
               trgLine += blockWidth;
1038
                }
-
 
1039
            }
912
            }
1040
        }
913
         }
-
 
914
      }
1041
    }
915
   }
1042
#else // going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!
916
#else // going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!
1043
    yFirst = 0;
917
   yFirst = 0;
1044
    yLast  = trgHeight;
918
   yLast = trgHeight;
1045
 
919
 
1046
    if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0)
920
   if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0)
1047
        return; // consistency check
921
      return; // consistency check
1048
 
922
 
1049
    for (int y = yFirst; y < yLast; ++y)
923
   for (int y = yFirst; y < yLast; ++y)
1050
    {
924
   {
1051
        /**/  uint32_t *trgLine = (      uint32_t *) BYTE_ADVANCE (trg, y * trgPitch);
925
      /**/  uint32_t *trgLine = (uint32_t *) BYTE_ADVANCE (trg, y * trgPitch);
1052
        const int ySrc = srcHeight * y / trgHeight;
926
      const int ySrc = srcHeight * y / trgHeight;
1053
        const uint32_t *srcLine = (const uint32_t *) BYTE_ADVANCE (src, ySrc * srcPitch);
927
      const uint32_t *srcLine = (const uint32_t *) BYTE_ADVANCE (src, ySrc * srcPitch);
1054
        for (int x = 0; x < trgWidth; ++x)
928
      for (int x = 0; x < trgWidth; ++x)
1055
        {
929
      {
1056
            const int xSrc = srcWidth * x / trgWidth;
930
         const int xSrc = srcWidth * x / trgWidth;
1057
            trgLine[x] = srcLine[xSrc];
931
         trgLine[x] = srcLine[xSrc];
1058
        }
932
      }
1059
    }
933
   }
1060
#endif // going over source or target
934
#endif // going over source or target
1061
 
935
 
1062
        return;
936
   return;
1063
}
937
}
1064
 
938
 
1065
 
939
 
1066
EXTERN_C bool xbrz_equalcolortest24 (uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
940
EXTERN_C bool xbrz_equalcolortest24 (uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
1067
{
941
{
1068
        return (dist24 (col1, col2) < equalColorTolerance);
942
   return (dist24 (col1, col2) < equalColorTolerance);
1069
}
943
}
1070
 
944
 
1071
 
945
 
1072
EXTERN_C bool xbrz_equalcolortest32 (uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
946
EXTERN_C bool xbrz_equalcolortest32 (uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
1073
{
947
{
1074
        return (dist32 (col1, col2) < equalColorTolerance);
948
   return (dist32 (col1, col2) < equalColorTolerance);
1075
}
949
}
1076
 
950
 
1077
 
951
 
1078
EXTERN_C void xbrz_scale24 (size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight)
952
EXTERN_C void xbrz_scale24 (size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight)
1079
{
953
{
1080
    if      (factor == 2) return scaleImage<Scaler2x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24);
954
   if (factor == 2) return scaleImage<Scaler2x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24);
1081
    else if (factor == 3) return scaleImage<Scaler3x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24);
955
   else if (factor == 3) return scaleImage<Scaler3x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24);
1082
    else if (factor == 4) return scaleImage<Scaler4x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24);
956
   else if (factor == 4) return scaleImage<Scaler4x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24);
1083
    else if (factor == 5) return scaleImage<Scaler5x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24);
957
   else if (factor == 5) return scaleImage<Scaler5x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24);
1084
    else if (factor == 6) return scaleImage<Scaler6x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24);
958
   else if (factor == 6) return scaleImage<Scaler6x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad24, dist24);
1085
}
959
}
1086
 
960
 
1087
 
961
 
1088
EXTERN_C void xbrz_scale32 (size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight)
962
EXTERN_C void xbrz_scale32 (size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight)
1089
{
963
{
1090
    if      (factor == 2) return scaleImage<Scaler2x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32);
964
   if (factor == 2) return scaleImage<Scaler2x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32);
1091
    else if (factor == 3) return scaleImage<Scaler3x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32);
965
   else if (factor == 3) return scaleImage<Scaler3x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32);
1092
    else if (factor == 4) return scaleImage<Scaler4x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32);
966
   else if (factor == 4) return scaleImage<Scaler4x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32);
1093
    else if (factor == 5) return scaleImage<Scaler5x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32);
967
   else if (factor == 5) return scaleImage<Scaler5x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32);
1094
    else if (factor == 6) return scaleImage<Scaler6x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32);
968
   else if (factor == 6) return scaleImage<Scaler6x> (src, trg, srcWidth, srcHeight, 0, srcHeight, alphagrad32, dist32);
1095
}
969
}