Subversion Repositories Games.Rick Dangerous

Rev

Rev 7 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 7 Rev 13
Line 41... Line 41...
41
#define XBRZ_CFG_DOMINANT_DIRECTION_THRESHOLD 3.6
41
#define XBRZ_CFG_DOMINANT_DIRECTION_THRESHOLD 3.6
42
#define XBRZ_CFG_STEEP_DIRECTION_THRESHOLD 2.2
42
#define XBRZ_CFG_STEEP_DIRECTION_THRESHOLD 2.2
43
 
43
 
44
 
44
 
45
// blend types
45
// blend types
46
#define BLEND_NONE 0
46
#define BLEND_NONE     0
47
#define BLEND_NORMAL 1 // a normal indication to blend
47
#define BLEND_NORMAL   1 // a normal indication to blend
48
#define BLEND_DOMINANT 2 // a strong indication to blend
48
#define BLEND_DOMINANT 2 // a strong indication to blend
49
 
49
 
50
 
50
 
51
// handy macros
51
// handy macros
52
#ifndef MIN
52
#ifndef MIN
Line 242... Line 242...
242
 
242
 
243
static void blend_line_steep_and_shallow_2x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
243
static void blend_line_steep_and_shallow_2x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
244
{
244
{
245
   color_format->alphagrad (outmatrix_ref (out, 1, 0), col, 1, 4);
245
   color_format->alphagrad (outmatrix_ref (out, 1, 0), col, 1, 4);
246
   color_format->alphagrad (outmatrix_ref (out, 0, 1), col, 1, 4);
246
   color_format->alphagrad (outmatrix_ref (out, 0, 1), col, 1, 4);
247
   color_format->alphagrad (outmatrix_ref (out, 1, 1), col, 5, 6); //[!] fixes 7/8 used in xBR
247
   color_format->alphagrad (outmatrix_ref (out, 1, 1), col, 5, 6); // [!] fixes 7/8 used in xBR
248
}
248
}
249
static void blend_line_steep_and_shallow_3x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
249
static void blend_line_steep_and_shallow_3x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
250
{
250
{
251
   color_format->alphagrad (outmatrix_ref (out, 2, 0), col, 1, 4);
251
   color_format->alphagrad (outmatrix_ref (out, 2, 0), col, 1, 4);
252
   color_format->alphagrad (outmatrix_ref (out, 0, 2), col, 1, 4);
252
   color_format->alphagrad (outmatrix_ref (out, 0, 2), col, 1, 4);
Line 258... Line 258...
258
{
258
{
259
   color_format->alphagrad (outmatrix_ref (out, 3, 1), col, 3, 4);
259
   color_format->alphagrad (outmatrix_ref (out, 3, 1), col, 3, 4);
260
   color_format->alphagrad (outmatrix_ref (out, 1, 3), col, 3, 4);
260
   color_format->alphagrad (outmatrix_ref (out, 1, 3), col, 3, 4);
261
   color_format->alphagrad (outmatrix_ref (out, 3, 0), col, 1, 4);
261
   color_format->alphagrad (outmatrix_ref (out, 3, 0), col, 1, 4);
262
   color_format->alphagrad (outmatrix_ref (out, 0, 3), col, 1, 4);
262
   color_format->alphagrad (outmatrix_ref (out, 0, 3), col, 1, 4);
263
   color_format->alphagrad (outmatrix_ref (out, 2, 2), col, 1, 3); //[!] fixes 1/4 used in xBR
263
   color_format->alphagrad (outmatrix_ref (out, 2, 2), col, 1, 3); // [!] fixes 1/4 used in xBR
264
   *outmatrix_ref (out, 3, 3) = col;
264
   *outmatrix_ref (out, 3, 3) = col;
265
   *outmatrix_ref (out, 3, 2) = col;
265
   *outmatrix_ref (out, 3, 2) = col;
266
   *outmatrix_ref (out, 2, 3) = col;
266
   *outmatrix_ref (out, 2, 3) = col;
267
}
267
}
268
static void blend_line_steep_and_shallow_5x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
268
static void blend_line_steep_and_shallow_5x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
Line 307... Line 307...
307
{
307
{
308
   color_format->alphagrad (outmatrix_ref (out, 1, 1), col, 1, 2);
308
   color_format->alphagrad (outmatrix_ref (out, 1, 1), col, 1, 2);
309
}
309
}
310
static void blend_line_diagonal_3x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
310
static void blend_line_diagonal_3x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
311
{
311
{
312
   color_format->alphagrad (outmatrix_ref (out, 1, 2), col, 1, 8); //conflict with other rotations for this odd scale
312
   color_format->alphagrad (outmatrix_ref (out, 1, 2), col, 1, 8); // conflict with other rotations for this odd scale
313
   color_format->alphagrad (outmatrix_ref (out, 2, 1), col, 1, 8);
313
   color_format->alphagrad (outmatrix_ref (out, 2, 1), col, 1, 8);
314
   color_format->alphagrad (outmatrix_ref (out, 2, 2), col, 7, 8); //
314
   color_format->alphagrad (outmatrix_ref (out, 2, 2), col, 7, 8);
315
}
315
}
316
static void blend_line_diagonal_4x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
316
static void blend_line_diagonal_4x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
317
{
317
{
318
   color_format->alphagrad (outmatrix_ref (out, 4 - 1, 4 / 2), col, 1, 2);
318
   color_format->alphagrad (outmatrix_ref (out, 4 - 1, 4 / 2), col, 1, 2);
319
   color_format->alphagrad (outmatrix_ref (out, 4 - 2, 4 / 2 + 1), col, 1, 2);
319
   color_format->alphagrad (outmatrix_ref (out, 4 - 2, 4 / 2 + 1), col, 1, 2);
320
   *outmatrix_ref (out, 4 - 1, 4 - 1) = col;
320
   *outmatrix_ref (out, 4 - 1, 4 - 1) = col;
321
}
321
}
322
static void blend_line_diagonal_5x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
322
static void blend_line_diagonal_5x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
323
{
323
{
324
   color_format->alphagrad (outmatrix_ref (out, 5 - 1, 5 / 2 + 0), col, 1, 8); //conflict with other rotations for this odd scale
324
   color_format->alphagrad (outmatrix_ref (out, 5 - 1, 5 / 2 + 0), col, 1, 8); // conflict with other rotations for this odd scale
325
   color_format->alphagrad (outmatrix_ref (out, 5 - 2, 5 / 2 + 1), col, 1, 8);
325
   color_format->alphagrad (outmatrix_ref (out, 5 - 2, 5 / 2 + 1), col, 1, 8);
326
   color_format->alphagrad (outmatrix_ref (out, 5 - 3, 5 / 2 + 2), col, 1, 8); //
326
   color_format->alphagrad (outmatrix_ref (out, 5 - 3, 5 / 2 + 2), col, 1, 8);
327
   color_format->alphagrad (outmatrix_ref (out, 4, 3), col, 7, 8);
327
   color_format->alphagrad (outmatrix_ref (out, 4, 3), col, 7, 8);
328
   color_format->alphagrad (outmatrix_ref (out, 3, 4), col, 7, 8);
328
   color_format->alphagrad (outmatrix_ref (out, 3, 4), col, 7, 8);
329
   *outmatrix_ref (out, 4, 4) = col;
329
   *outmatrix_ref (out, 4, 4) = col;
330
}
330
}
331
static void blend_line_diagonal_6x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
331
static void blend_line_diagonal_6x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
Line 342... Line 342...
342
// corner scaling functions
342
// corner scaling functions
343
 
343
 
344
static void blend_corner_2x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
344
static void blend_corner_2x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
345
{
345
{
346
   // model a round corner
346
   // model a round corner
347
   color_format->alphagrad (outmatrix_ref (out, 1, 1), col, 21, 100); //exact: 1 - pi/4 = 0.2146018366
347
   color_format->alphagrad (outmatrix_ref (out, 1, 1), col, 21, 100); // exact: 1 - pi/4 = 0.2146018366
348
}
348
}
349
static void blend_corner_3x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
349
static void blend_corner_3x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
350
{
350
{
351
   // model a round corner
351
   // model a round corner
352
   color_format->alphagrad (outmatrix_ref (out, 2, 2), col, 45, 100); //exact: 0.4545939598
352
   color_format->alphagrad (outmatrix_ref (out, 2, 2), col, 45, 100); // exact: 0.4545939598
353
   //color_format->alphagrad (outmatrix_ref (out, 2, 1), col, 7, 256); //0.02826017254 -> negligible + avoid conflicts with other rotations for this odd scale
353
   //color_format->alphagrad (outmatrix_ref (out, 2, 1), col,  7, 256); // 0.02826017254 -> negligible + avoid conflicts with other rotations for this odd scale
354
   //color_format->alphagrad (outmatrix_ref (out, 1, 2), col, 7, 256); //0.02826017254
354
   //color_format->alphagrad (outmatrix_ref (out, 1, 2), col,  7, 256); // 0.02826017254
355
}
355
}
356
static void blend_corner_4x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
356
static void blend_corner_4x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
357
{
357
{
358
   // model a round corner
358
   // model a round corner
359
   color_format->alphagrad (outmatrix_ref (out, 3, 3), col, 68, 100); //exact: 0.6848532563
359
   color_format->alphagrad (outmatrix_ref (out, 3, 3), col, 68, 100); // exact: 0.6848532563
360
   color_format->alphagrad (outmatrix_ref (out, 3, 2), col, 9, 100); //0.08677704501
360
   color_format->alphagrad (outmatrix_ref (out, 3, 2), col,  9, 100); // 0.08677704501
361
   color_format->alphagrad (outmatrix_ref (out, 2, 3), col, 9, 100); //0.08677704501
361
   color_format->alphagrad (outmatrix_ref (out, 2, 3), col,  9, 100); // 0.08677704501
362
}
362
}
363
static void blend_corner_5x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
363
static void blend_corner_5x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
364
{
364
{
365
   // model a round corner
365
   // model a round corner
366
   color_format->alphagrad (outmatrix_ref (out, 4, 4), col, 86, 100); //exact: 0.8631434088
366
   color_format->alphagrad (outmatrix_ref (out, 4, 4), col, 86, 100); // exact: 0.8631434088
367
   color_format->alphagrad (outmatrix_ref (out, 4, 3), col, 23, 100); //0.2306749731
367
   color_format->alphagrad (outmatrix_ref (out, 4, 3), col, 23, 100); // 0.2306749731
368
   color_format->alphagrad (outmatrix_ref (out, 3, 4), col, 23, 100); //0.2306749731
368
   color_format->alphagrad (outmatrix_ref (out, 3, 4), col, 23, 100); // 0.2306749731
369
   //color_format->alphagrad (outmatrix_ref (out, 4, 2), col, 1, 64); //0.01676812367 -> negligible + avoid conflicts with other rotations for this odd scale
369
   //color_format->alphagrad (outmatrix_ref (out, 4, 2), col,  1,  64); // 0.01676812367 -> negligible + avoid conflicts with other rotations for this odd scale
370
   //color_format->alphagrad (outmatrix_ref (out, 2, 4), col, 1, 64); //0.01676812367
370
   //color_format->alphagrad (outmatrix_ref (out, 2, 4), col,  1,  64); // 0.01676812367
371
}
371
}
372
static void blend_corner_6x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
372
static void blend_corner_6x (uint32_t col, outmatrix_t *out, colorformat_t *color_format, outmatrixreffunc_t outmatrix_ref)
373
{
373
{
374
   // model a round corner
374
   // model a round corner
375
   color_format->alphagrad (outmatrix_ref (out, 5, 5), col, 97, 100); //exact: 0.9711013910
375
   color_format->alphagrad (outmatrix_ref (out, 5, 5), col, 97, 100); // exact: 0.9711013910
376
   color_format->alphagrad (outmatrix_ref (out, 4, 5), col, 42, 100); //0.4236372243
376
   color_format->alphagrad (outmatrix_ref (out, 4, 5), col, 42, 100); // 0.4236372243
377
   color_format->alphagrad (outmatrix_ref (out, 5, 4), col, 42, 100); //0.4236372243
377
   color_format->alphagrad (outmatrix_ref (out, 5, 4), col, 42, 100); // 0.4236372243
378
   color_format->alphagrad (outmatrix_ref (out, 5, 3), col, 6, 100); //0.05652034508
378
   color_format->alphagrad (outmatrix_ref (out, 5, 3), col,  6, 100); // 0.05652034508
379
   color_format->alphagrad (outmatrix_ref (out, 3, 5), col, 6, 100); //0.05652034508
379
   color_format->alphagrad (outmatrix_ref (out, 3, 5), col,  6, 100); // 0.05652034508
380
}
380
}
381
 
381
 
382
/////////////////////////////////////
382
/////////////////////////////////////
383
// scaler objects for various factors
383
// scaler objects for various factors
384
 
384
 
Line 417... Line 417...
417
/////////////////////////////////////////////////////
417
/////////////////////////////////////////////////////
418
// color distance functions for various color formats
418
// color distance functions for various color formats
419
 
419
 
420
static double dist24 (uint32_t pix1, uint32_t pix2)
420
static double dist24 (uint32_t pix1, uint32_t pix2)
421
{
421
{
422
   //30% perf boost compared to plain distYCbCr()!
422
   // 30% perf boost compared to plain distYCbCr()!
423
   //consumes 64 MB memory; using double is only 2% faster, but takes 128 MB
423
   // consumes 64 MB memory; using double is only 2% faster, but takes 128 MB
424
   static float diffToDist[256 * 256 * 256];
424
   static float diffToDist[256 * 256 * 256] = { 0 };
425
   static bool is_initialized = false;
425
   static bool is_initialized = false;
426
   if (!is_initialized)
426
   if (!is_initialized)
427
   {
427
   {
428
      for (uint32_t i = 0; i < 256 * 256 * 256; ++i) //startup time: 114 ms on Intel Core i5 (four cores)
428
      for (uint32_t i = 0; i < 256 * 256 * 256; ++i) //startup time: 114 ms on Intel Core i5 (four cores)
429
      {
429
      {
Line 449... Line 449...
449
 
449
 
450
   const int r_diff = (int) GET_RED (pix1) - (int) GET_RED (pix2);
450
   const int r_diff = (int) GET_RED (pix1) - (int) GET_RED (pix2);
451
   const int g_diff = (int) GET_GREEN (pix1) - (int) GET_GREEN (pix2);
451
   const int g_diff = (int) GET_GREEN (pix1) - (int) GET_GREEN (pix2);
452
   const int b_diff = (int) GET_BLUE (pix1) - (int) GET_BLUE (pix2);
452
   const int b_diff = (int) GET_BLUE (pix1) - (int) GET_BLUE (pix2);
453
 
453
 
454
   return diffToDist[(((r_diff + 0xFF) / 2) << 16) | //slightly reduce precision (division by 2) to squeeze value into single byte
454
   return (diffToDist[  (((r_diff + 0xFF) / 2) << 16) // slightly reduce precision (division by 2) to squeeze value into single byte
455
      (((g_diff + 0xFF) / 2) << 8) |
455
                      | (((g_diff + 0xFF) / 2) <<  8)
456
      (((b_diff + 0xFF) / 2) << 0)];
456
                      | (((b_diff + 0xFF) / 2) <<  0)]);
457
}
457
}
458
static double dist32 (uint32_t pix1, uint32_t pix2)
458
static double dist32 (uint32_t pix1, uint32_t pix2)
459
{
459
{
460
   // Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1]
460
   // Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1]
461
   //    1. if a1 = a2, distance should be: a1 * distYCbCr()
461
   //    1. if a1 = a2, distance should be: a1 * distYCbCr()
462
   //    2. if a1 = 0,  distance should be: a2 * distYCbCr(black, white) = a2 * 255
462
   //    2. if a1 = 0,  distance should be: a2 * distYCbCr(black, white) = a2 * 255
463
   //    3. if a1 = 1,  ??? maybe: 255 * (1 - a2) + a2 * distYCbCr()
463
   //    3. if a1 = 1,  ??? maybe: 255 * (1 - a2) + a2 * distYCbCr()
464
   //return MIN (a1, a2) * distYCbCrBuffered(pix1, pix2) + 255 * abs(a1 - a2);
464
   // return MIN (a1, a2) * distYCbCrBuffered(pix1, pix2) + 255 * abs(a1 - a2);
465
   //=> following code is 15% faster:
465
   // => following code is 15% faster:
466
   const double d = dist24 (pix1, pix2);
466
   const double d = dist24 (pix1, pix2);
467
   const double a1 = GET_ALPHA (pix1) / 255.0;
467
   const double a1 = GET_ALPHA (pix1) / 255.0;
468
   const double a2 = GET_ALPHA (pix2) / 255.0;
468
   const double a2 = GET_ALPHA (pix2) / 255.0;
469
   return (a1 < a2 ? a1 * d + 255 * (a2 - a1) : a2 * d + 255 * (a1 - a2));
469
   return (a1 < a2 ? a1 * d + 255 * (a2 - a1) : a2 * d + 255 * (a1 - a2));
470
}
470
}
Line 520... Line 520...
520
 
520
 
521
   const int weight = 4;
521
   const int weight = 4;
522
   double jg = color_format->dist (ker->i, ker->f) + color_format->dist (ker->f, ker->c) + color_format->dist (ker->n, ker->k) + color_format->dist (ker->k, ker->h) + weight * color_format->dist (ker->j, ker->g);
522
   double jg = color_format->dist (ker->i, ker->f) + color_format->dist (ker->f, ker->c) + color_format->dist (ker->n, ker->k) + color_format->dist (ker->k, ker->h) + weight * color_format->dist (ker->j, ker->g);
523
   double fk = color_format->dist (ker->e, ker->j) + color_format->dist (ker->j, ker->o) + color_format->dist (ker->b, ker->g) + color_format->dist (ker->g, ker->l) + weight * color_format->dist (ker->f, ker->k);
523
   double fk = color_format->dist (ker->e, ker->j) + color_format->dist (ker->j, ker->o) + color_format->dist (ker->b, ker->g) + color_format->dist (ker->g, ker->l) + weight * color_format->dist (ker->f, ker->k);
524
 
524
 
525
   if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
525
   if (jg < fk) // test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
526
   {
526
   {
527
      const bool dominantGradient = XBRZ_CFG_DOMINANT_DIRECTION_THRESHOLD * jg < fk;
527
      const bool dominantGradient = XBRZ_CFG_DOMINANT_DIRECTION_THRESHOLD * jg < fk;
528
      if (ker->f != ker->g && ker->f != ker->j)
528
      if (ker->f != ker->g && ker->f != ker->j)
529
         result->blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
529
         result->blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
530
 
530
 
Line 549... Line 549...
549
{
549
{
550
   // input kernel area naming convention:
550
   // input kernel area naming convention:
551
   // -------------
551
   // -------------
552
   // | A | B | C |
552
   // | A | B | C |
553
   // ----|---|---|
553
   // ----|---|---|
554
   // | D | E | F | //input pixel is at position E
554
   // | D | E | F | // input pixel is at position E
555
   // ----|---|---|
555
   // ----|---|---|
556
   // | G | H | I |
556
   // | G | H | I |
557
   // -------------
557
   // -------------
558
 
558
 
559
   uint32_t
559
   uint32_t
Line 590... Line 590...
590
      outmatrix_t out;
590
      outmatrix_t out;
591
      out.size = scaler->factor;
591
      out.size = scaler->factor;
592
      out.ptr = target;
592
      out.ptr = target;
593
      out.stride = trgWidth;
593
      out.stride = trgWidth;
594
 
594
 
595
      px = (color_format->dist (e, f) <= color_format->dist (e, h) ? f : h); //choose most similar color
595
      px = (color_format->dist (e, f) <= color_format->dist (e, h) ? f : h); // choose most similar color
596
 
596
 
597
      if (doLineBlend)
597
      if (doLineBlend)
598
      {
598
      {
599
         const double fg = color_format->dist (f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
599
         const double fg = color_format->dist (f, g); // test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
600
         const double hc = color_format->dist (h, c); //
600
         const double hc = color_format->dist (h, c);
601
         const bool haveShallowLine = (XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * fg <= hc) && (e != g) && (d != g);
601
         const bool haveShallowLine = (XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * fg <= hc) && (e != g) && (d != g);
602
         const bool haveSteepLine   = (XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * hc <= fg) && (e != c) && (b != c);
602
         const bool haveSteepLine   = (XBRZ_CFG_STEEP_DIRECTION_THRESHOLD * hc <= fg) && (e != c) && (b != c);
603
 
603
 
604
         if (haveShallowLine)
604
         if (haveShallowLine)
605
         {
605
         {
Line 663... Line 663...
663
 
663
 
664
         preprocess_corners (&res, &ker, color_format);
664
         preprocess_corners (&res, &ker, color_format);
665
 
665
 
666
         // preprocessing blend result:
666
         // preprocessing blend result:
667
         // ---------
667
         // ---------
668
         // | F | G |   //evalute corner between F, G, J, K
668
         // | F | G |   // evalute corner between F, G, J, K
669
         // ----|---|   //input pixel is at position F
669
         // ----|---|   // input pixel is at position F
670
         // | J | K |
670
         // | J | K |
671
         // ---------
671
         // ---------
672
 
672
 
673
         setTopR (&preProcBuffer[x], res.blend_j);
673
         setTopR (&preProcBuffer[x], res.blend_j);
674
         if (x + 1 < bufferSize)
674
         if (x + 1 < bufferSize)
Line 677... Line 677...
677
   }
677
   }
678
   //------------------------------------------------------------------------------------
678
   //------------------------------------------------------------------------------------
679
 
679
 
680
   for (int y = yFirst; y < yLast; ++y)
680
   for (int y = yFirst; y < yLast; ++y)
681
   {
681
   {
682
      uint32_t *out = trg + scaler->factor * y * trgWidth; //consider MT "striped" access
682
      uint32_t *out = trg + scaler->factor * y * trgWidth; // consider MT "striped" access
683
 
683
 
684
      const uint32_t* s_m1 = src + srcWidth * MAX (y - 1, 0);
684
      const uint32_t* s_m1 = src + srcWidth * MAX (y - 1, 0);
685
      const uint32_t* s_0 = src + srcWidth * y; //center line
685
      const uint32_t* s_0 = src + srcWidth * y; // center line
686
      const uint32_t* s_p1 = src + srcWidth * MIN (y + 1, srcHeight - 1);
686
      const uint32_t* s_p1 = src + srcWidth * MIN (y + 1, srcHeight - 1);
687
      const uint32_t* s_p2 = src + srcWidth * MIN (y + 2, srcHeight - 1);
687
      const uint32_t* s_p2 = src + srcWidth * MIN (y + 2, srcHeight - 1);
688
 
688
 
689
      uint8_t blend_xy1 = 0; // corner blending for current (x, y + 1) position
689
      uint8_t blend_xy1 = 0; // corner blending for current (x, y + 1) position
690
 
690
 
691
      for (int x = 0; x < srcWidth; ++x, out += scaler->factor)
691
      for (int x = 0; x < srcWidth; ++x, out += scaler->factor)
692
      {
692
      {
693
         // all those bounds checks have only insignificant impact on performance!
693
         // all those bounds checks have only insignificant impact on performance!
694
         const int x_m1 = MAX (x - 1, 0); //perf: prefer array indexing to additional pointers!
694
         const int x_m1 = MAX (x - 1, 0); // perf: prefer array indexing to additional pointers!
695
         const int x_p1 = MIN (x + 1, srcWidth - 1);
695
         const int x_p1 = MIN (x + 1, srcWidth - 1);
696
         const int x_p2 = MIN (x + 2, srcWidth - 1);
696
         const int x_p2 = MIN (x + 2, srcWidth - 1);
697
 
697
 
698
         kernel_4x4_t ker4; //perf: initialization is negligible
698
         kernel_4x4_t ker4; // perf: initialization is negligible
699
         ker4.a = s_m1[x_m1]; ker4.b = s_m1[x]; ker4.c = s_m1[x_p1]; ker4.d = s_m1[x_p2]; // read sequentially from memory as far as possible
699
         ker4.a = s_m1[x_m1]; ker4.b = s_m1[x]; ker4.c = s_m1[x_p1]; ker4.d = s_m1[x_p2]; // read sequentially from memory as far as possible
700
         ker4.e = s_0[x_m1];  ker4.f = s_0[x];  ker4.g = s_0[x_p1];  ker4.h = s_0[x_p2];
700
         ker4.e = s_0[x_m1];  ker4.f = s_0[x];  ker4.g = s_0[x_p1];  ker4.h = s_0[x_p2];
701
         ker4.i = s_p1[x_m1]; ker4.j = s_p1[x]; ker4.k = s_p1[x_p1]; ker4.l = s_p1[x_p2];
701
         ker4.i = s_p1[x_m1]; ker4.j = s_p1[x]; ker4.k = s_p1[x_p1]; ker4.l = s_p1[x_p2];
702
         ker4.m = s_p2[x_m1]; ker4.n = s_p2[x]; ker4.o = s_p2[x_p1]; ker4.p = s_p2[x_p2];
702
         ker4.m = s_p2[x_m1]; ker4.n = s_p2[x]; ker4.o = s_p2[x_p1]; ker4.p = s_p2[x_p2];
703
 
703
 
704
         // evaluate the four corners on bottom-right of current pixel
704
         // evaluate the four corners on bottom-right of current pixel
705
         uint8_t blend_xy = 0; //for current (x, y) position
705
         uint8_t blend_xy = 0; // for current (x, y) position
706
         {
706
         {
707
            blendresult_t res;
707
            blendresult_t res;
708
            preprocess_corners (&res, &ker4, color_format);
708
            preprocess_corners (&res, &ker4, color_format);
709
 
709
 
710
            // preprocessing blend result:
710
            // preprocessing blend result:
711
            // ---------
711
            // ---------
712
            // | F | G |   //evalute corner between F, G, J, K
712
            // | F | G |   // evalute corner between F, G, J, K
713
            // ----|---|   //current input pixel is at position F
713
            // ----|---|   // current input pixel is at position F
714
            // | J | K |
714
            // | J | K |
715
            // ---------
715
            // ---------
716
 
716
 
717
            blend_xy = preProcBuffer[x];
717
            blend_xy = preProcBuffer[x];
718
            setBottomR (&blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
718
            setBottomR (&blend_xy, res.blend_f); // all four corners of (x, y) have been determined at this point due to processing sequence!
719
 
719
 
720
            setTopR (&blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
720
            setTopR (&blend_xy1, res.blend_j); // set 2nd known corner for (x, y + 1)
721
            preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row
721
            preProcBuffer[x] = blend_xy1; // store on current buffer position for use on next row
722
 
722
 
723
            blend_xy1 = 0;
723
            blend_xy1 = 0;
724
            setTopL (&blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
724
            setTopL (&blend_xy1, res.blend_k); // set 1st known corner for (x + 1, y + 1) and buffer for use on next column
725
 
725
 
726
            if (x + 1 < bufferSize) //set 3rd known corner for (x + 1, y)
726
            if (x + 1 < bufferSize) // set 3rd known corner for (x + 1, y)
727
               setBottomL (&preProcBuffer[x + 1], res.blend_g);
727
               setBottomL (&preProcBuffer[x + 1], res.blend_g);
728
         }
728
         }
729
 
729
 
730
         //fill block of size scale * scale with the given color
730
         // fill block of size scale * scale with the given color
731
         uint32_t *blk = out;
731
         uint32_t *blk = out;
732
         for (int _blk_y = 0; _blk_y < scaler->factor; ++_blk_y, blk = (uint32_t *) BYTE_ADVANCE (blk, trgWidth * sizeof (uint32_t)))
732
         for (int _blk_y = 0; _blk_y < scaler->factor; ++_blk_y, blk = (uint32_t *) BYTE_ADVANCE (blk, trgWidth * sizeof (uint32_t)))
733
            for (int _blk_x = 0; _blk_x < scaler->factor; ++_blk_x)
733
            for (int _blk_x = 0; _blk_x < scaler->factor; ++_blk_x)
734
               blk[_blk_x] = ker4.f;
734
               blk[_blk_x] = ker4.f;
735
 
735
 
736
         //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
736
         // place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
737
 
737
 
738
         //blend four corners of current pixel
738
         // blend four corners of current pixel
739
         if (blend_xy != 0) //good 5% perf-improvement
739
         if (blend_xy != 0) // good 5% perf-improvement
740
         {
740
         {
741
            kernel_3x3_t ker3; //perf: initialization is negligible
741
            kernel_3x3_t ker3; // perf: initialization is negligible
742
            ker3.a = ker4.a; ker3.b = ker4.b; ker3.c = ker4.c;
742
            ker3.a = ker4.a; ker3.b = ker4.b; ker3.c = ker4.c;
743
            ker3.d = ker4.e; ker3.e = ker4.f; ker3.f = ker4.g;
743
            ker3.d = ker4.e; ker3.e = ker4.f; ker3.f = ker4.g;
744
            ker3.g = ker4.i; ker3.h = ker4.j; ker3.i = ker4.k;
744
            ker3.g = ker4.i; ker3.h = ker4.j; ker3.i = ker4.k;
745
 
745
 
746
            blend_pixel (scaler, &ker3, out, trgWidth, blend_xy, color_format, outmatrixref_0);
746
            blend_pixel (scaler, &ker3, out, trgWidth, blend_xy, color_format, outmatrixref_0);
Line 775... Line 775...
775
   {
775
   {
776
      //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
776
      //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
777
      // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
777
      // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
778
 
778
 
779
      //keep within for loop to support MT input slices!
779
      //keep within for loop to support MT input slices!
780
      const int yTrg_first = (y      * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)
780
      const int yTrg_first = (y      * trgHeight + srcHeight - 1) / srcHeight; // = ceil(y * trgHeight / srcHeight)
781
      const int yTrg_last = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)
781
      const int yTrg_last = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; // = ceil(((y + 1) * trgHeight) / srcHeight)
782
      const int blockHeight = yTrg_last - yTrg_first;
782
      const int blockHeight = yTrg_last - yTrg_first;
783
 
783
 
784
      if (blockHeight > 0)
784
      if (blockHeight > 0)
785
      {
785
      {
786
         const uint32_t *srcLine = (const uint32_t *) BYTE_ADVANCE (src, y * srcPitch);
786
         const uint32_t *srcLine = (const uint32_t *) BYTE_ADVANCE (src, y * srcPitch);