Differential D22184 Diff 64134 www/qt5-webengine/files/patch-src_3rdparty_chromium_third__party_libpng_powerpc_filter__vsx__intrinsics.c
Changeset View
Changeset View
Standalone View
Standalone View
www/qt5-webengine/files/patch-src_3rdparty_chromium_third__party_libpng_powerpc_filter__vsx__intrinsics.c
- This file was added.
Property | Old Value | New Value |
---|---|---|
fbsd:nokeywords | null | yes \ No newline at end of property |
svn:eol-style | null | native \ No newline at end of property |
svn:mime-type | null | text/plain \ No newline at end of property |
--- src/3rdparty/chromium/third_party/libpng/powerpc/filter_vsx_intrinsics.c.orig 2019-10-27 17:23:52 UTC | |||||
+++ src/3rdparty/chromium/third_party/libpng/powerpc/filter_vsx_intrinsics.c | |||||
@@ -0,0 +1,767 @@ | |||||
+/* filter_vsx_intrinsics.c - PowerPC optimised filter functions | |||||
+ * | |||||
+ * Copyright (c) 2017 Glenn Randers-Pehrson | |||||
+ * Written by Vadim Barkov, 2017. | |||||
+ * Last changed in libpng 1.6.29 [March 16, 2017] | |||||
+ * | |||||
+ * This code is released under the libpng license. | |||||
+ * For conditions of distribution and use, see the disclaimer | |||||
+ * and license in png.h | |||||
+ */ | |||||
+#include <stdio.h> | |||||
+#include <stdint.h> | |||||
+#include "../pngpriv.h" | |||||
+ | |||||
+#ifdef PNG_READ_SUPPORTED | |||||
+ | |||||
+/* This code requires -maltivec and -mvsx on the command line: */ | |||||
+#if PNG_POWERPC_VSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ | |||||
+ | |||||
+#include <altivec.h> | |||||
+ | |||||
+#if PNG_POWERPC_VSX_OPT > 0 | |||||
+ | |||||
+#ifndef __VSX__ | |||||
+# error "This code requires VSX support (POWER7 and later). Please provide -mvsx compiler flag." | |||||
+#endif | |||||
+ | |||||
+#define vec_ld_unaligned(vec,data) vec = vec_vsx_ld(0,data) | |||||
+#define vec_st_unaligned(vec,data) vec_vsx_st(vec,0,data) | |||||
+ | |||||
+ | |||||
+/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d). | |||||
+ * They're positioned like this: | |||||
+ * prev: c b | |||||
+ * row: a d | |||||
+ * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be | |||||
+ * whichever of a, b, or c is closest to p=a+b-c. | |||||
+ * ( this is taken from ../intel/filter_sse2_intrinsics.c ) | |||||
+ */ | |||||
+ | |||||
+#define vsx_declare_common_vars(row_info,row,prev_row,offset) \ | |||||
+ png_byte i;\ | |||||
+ png_bytep rp = row + offset;\ | |||||
+ png_const_bytep pp = prev_row;\ | |||||
+ png_size_t unaligned_top = 16 - (((png_size_t)rp % 16));\ | |||||
+ png_size_t istop;\ | |||||
+ if(unaligned_top == 16)\ | |||||
+ unaligned_top = 0;\ | |||||
+ istop = row_info->rowbytes;\ | |||||
+ if((unaligned_top < istop))\ | |||||
+ istop -= unaligned_top;\ | |||||
+ else{\ | |||||
+ unaligned_top = istop;\ | |||||
+ istop = 0;\ | |||||
+ } | |||||
+ | |||||
+void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row, | |||||
+ png_const_bytep prev_row) | |||||
+{ | |||||
+ vector unsigned char rp_vec; | |||||
+ vector unsigned char pp_vec; | |||||
+ vsx_declare_common_vars(row_info,row,prev_row,0) | |||||
+ | |||||
+ /* Altivec operations require 16-byte aligned data | |||||
+ * but input can be unaligned. So we calculate | |||||
+ * unaligned part as usual. | |||||
+ */ | |||||
+ for (i = 0; i < unaligned_top; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff); | |||||
+ rp++; | |||||
+ } | |||||
+ | |||||
+ /* Using SIMD while we can */ | |||||
+ while( istop >= 16 ) | |||||
+ { | |||||
+ rp_vec = vec_ld(0,rp); | |||||
+ vec_ld_unaligned(pp_vec,pp); | |||||
+ | |||||
+ rp_vec = vec_add(rp_vec,pp_vec); | |||||
+ | |||||
+ vec_st(rp_vec,0,rp); | |||||
+ | |||||
+ pp += 16; | |||||
+ rp += 16; | |||||
+ istop -= 16; | |||||
+ } | |||||
+ | |||||
+ if(istop > 0) | |||||
+ { | |||||
+ /* If byte count of row is not divisible by 16 | |||||
+ * we will process remaining part as usual | |||||
+ */ | |||||
+ for (i = 0; i < istop; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff); | |||||
+ rp++; | |||||
+ } | |||||
+} | |||||
+ | |||||
+} | |||||
+ | |||||
+static const vector unsigned char VSX_LEFTSHIFTED1_4 = {16,16,16,16, 0, 1, 2, 3,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_LEFTSHIFTED2_4 = {16,16,16,16,16,16,16,16, 4, 5, 6, 7,16,16,16,16}; | |||||
+static const vector unsigned char VSX_LEFTSHIFTED3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 8, 9,10,11}; | |||||
+ | |||||
+static const vector unsigned char VSX_LEFTSHIFTED1_3 = {16,16,16, 0, 1, 2,16,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_LEFTSHIFTED2_3 = {16,16,16,16,16,16, 3, 4, 5,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_LEFTSHIFTED3_3 = {16,16,16,16,16,16,16,16,16, 6, 7, 8,16,16,16,16}; | |||||
+static const vector unsigned char VSX_LEFTSHIFTED4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 9,10,11,16}; | |||||
+ | |||||
+static const vector unsigned char VSX_NOT_SHIFTED1_4 = {16,16,16,16, 4, 5, 6, 7,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_NOT_SHIFTED2_4 = {16,16,16,16,16,16,16,16, 8, 9,10,11,16,16,16,16}; | |||||
+static const vector unsigned char VSX_NOT_SHIFTED3_4 = {16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,15}; | |||||
+ | |||||
+static const vector unsigned char VSX_NOT_SHIFTED1_3 = {16,16,16, 3, 4, 5,16,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_NOT_SHIFTED2_3 = {16,16,16,16,16,16, 6, 7, 8,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_NOT_SHIFTED3_3 = {16,16,16,16,16,16,16,16,16, 9,10,11,16,16,16,16}; | |||||
+static const vector unsigned char VSX_NOT_SHIFTED4_3 = {16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,16}; | |||||
+ | |||||
+static const vector unsigned char VSX_CHAR_ZERO = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |||||
+#ifdef __LITTLE_ENDIAN__ | |||||
+ | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT1_4 = { 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT2_4 = { 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT3_4 = {12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16}; | |||||
+ | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR1_4 = {16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR2_4 = {16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16}; | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6}; | |||||
+ | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT1_3 = { 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT2_3 = { 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT3_3 = { 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT4_3 = {12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16}; | |||||
+ | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR1_3 = {16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR2_3 = {16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR3_3 = {16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16}; | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16}; | |||||
+ | |||||
+#elif defined(__BIG_ENDIAN__) | |||||
+ | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT1_4 = {16, 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT2_4 = {16, 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT3_4 = {16,12,16,13,16,14,16,15,16,16,16,16,16,16,16,16}; | |||||
+ | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR1_4 = {16,16,16,16, 1, 3, 5, 7,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR2_4 = {16,16,16,16,16,16,16,16, 1, 3, 5, 7,16,16,16,16}; | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5, 7}; | |||||
+ | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT1_3 = {16, 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT2_3 = {16, 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT3_3 = {16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_CHAR_TO_SHORT4_3 = {16,12,16,13,16,14,16,16,16,16,16,16,16,16,16,16}; | |||||
+ | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR1_3 = {16,16,16, 1, 3, 5,16,16,16,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR2_3 = {16,16,16,16,16,16, 1, 3, 5,16,16,16,16,16,16,16}; | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR3_3 = {16,16,16,16,16,16,16,16,16, 1, 3, 5,16,16,16,16}; | |||||
+static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5,16}; | |||||
+ | |||||
+#endif | |||||
+ | |||||
+#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VSX_CHAR_ZERO,VSX_CHAR_TO_SHORT##offset##_##bpp) | |||||
+#define vsx_short_to_char(vec,offset,bpp) vec_perm(((vector unsigned char)(vec)),VSX_CHAR_ZERO,VSX_SHORT_TO_CHAR##offset##_##bpp) | |||||
+ | |||||
+#ifdef PNG_USE_ABS | |||||
+# define vsx_abs(number) abs(number) | |||||
+#else | |||||
+# define vsx_abs(number) (number > 0) ? (number) : -(number) | |||||
+#endif | |||||
+ | |||||
+void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row, | |||||
+ png_const_bytep prev_row) | |||||
+{ | |||||
+ const png_byte bpp = 4; | |||||
+ | |||||
+ vector unsigned char rp_vec; | |||||
+ vector unsigned char part_vec; | |||||
+ | |||||
+ vsx_declare_common_vars(row_info,row,prev_row,bpp) | |||||
+ | |||||
+ PNG_UNUSED(pp) | |||||
+ | |||||
+ /* Altivec operations require 16-byte aligned data | |||||
+ * but input can be unaligned. So we calculate | |||||
+ * unaligned part as usual. | |||||
+ */ | |||||
+ for (i = 0; i < unaligned_top; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); | |||||
+ rp++; | |||||
+ } | |||||
+ | |||||
+ /* Using SIMD while we can */ | |||||
+ while( istop >= 16 ) | |||||
+ { | |||||
+ for(i=0;i < bpp ; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); | |||||
+ rp++; | |||||
+ } | |||||
+ rp -= bpp; | |||||
+ | |||||
+ rp_vec = vec_ld(0,rp); | |||||
+ part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4); | |||||
+ rp_vec = vec_add(rp_vec,part_vec); | |||||
+ | |||||
+ part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4); | |||||
+ rp_vec = vec_add(rp_vec,part_vec); | |||||
+ | |||||
+ part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4); | |||||
+ rp_vec = vec_add(rp_vec,part_vec); | |||||
+ | |||||
+ vec_st(rp_vec,0,rp); | |||||
+ | |||||
+ rp += 16; | |||||
+ istop -= 16; | |||||
+ } | |||||
+ | |||||
+ if(istop > 0) | |||||
+ for (i = 0; i < istop % 16; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + (int)(*(rp - bpp))) & 0xff); | |||||
+ rp++; | |||||
+ } | |||||
+ | |||||
+} | |||||
+ | |||||
+void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row, | |||||
+ png_const_bytep prev_row) | |||||
+{ | |||||
+ const png_byte bpp = 3; | |||||
+ | |||||
+ vector unsigned char rp_vec; | |||||
+ vector unsigned char part_vec; | |||||
+ | |||||
+ vsx_declare_common_vars(row_info,row,prev_row,bpp) | |||||
+ | |||||
+ PNG_UNUSED(pp) | |||||
+ | |||||
+ /* Altivec operations require 16-byte aligned data | |||||
+ * but input can be unaligned. So we calculate | |||||
+ * unaligned part as usual. | |||||
+ */ | |||||
+ for (i = 0; i < unaligned_top; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); | |||||
+ rp++; | |||||
+ } | |||||
+ | |||||
+ /* Using SIMD while we can */ | |||||
+ while( istop >= 16 ) | |||||
+ { | |||||
+ for(i=0;i < bpp ; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); | |||||
+ rp++; | |||||
+ } | |||||
+ rp -= bpp; | |||||
+ | |||||
+ rp_vec = vec_ld(0,rp); | |||||
+ part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3); | |||||
+ rp_vec = vec_add(rp_vec,part_vec); | |||||
+ | |||||
+ part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3); | |||||
+ rp_vec = vec_add(rp_vec,part_vec); | |||||
+ | |||||
+ part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3); | |||||
+ rp_vec = vec_add(rp_vec,part_vec); | |||||
+ | |||||
+ part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3); | |||||
+ rp_vec = vec_add(rp_vec,part_vec); | |||||
+ | |||||
+ vec_st(rp_vec,0,rp); | |||||
+ rp += 15; | |||||
+ istop -= 16; | |||||
+ | |||||
+ /* Since 16 % bpp = 16 % 3 = 1, last element of array must | |||||
+ * be proceeded manually | |||||
+ */ | |||||
+ *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); | |||||
+ rp++; | |||||
+ } | |||||
+ | |||||
+ if(istop > 0) | |||||
+ for (i = 0; i < istop % 16; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); | |||||
+ rp++; | |||||
+ } | |||||
+} | |||||
+ | |||||
+void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row, | |||||
+ png_const_bytep prev_row) | |||||
+{ | |||||
+ const png_byte bpp = 4; | |||||
+ | |||||
+ vector unsigned char rp_vec; | |||||
+ vector unsigned char pp_vec; | |||||
+ vector unsigned char pp_part_vec; | |||||
+ vector unsigned char rp_part_vec; | |||||
+ vector unsigned char avg_vec; | |||||
+ | |||||
+ vsx_declare_common_vars(row_info,row,prev_row,bpp) | |||||
+ rp -= bpp; | |||||
+ if(istop >= bpp) | |||||
+ istop -= bpp; | |||||
+ | |||||
+ for (i = 0; i < bpp; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + | |||||
+ ((int)(*pp++) / 2 )) & 0xff); | |||||
+ | |||||
+ rp++; | |||||
+ } | |||||
+ | |||||
+ /* Altivec operations require 16-byte aligned data | |||||
+ * but input can be unaligned. So we calculate | |||||
+ * unaligned part as usual. | |||||
+ */ | |||||
+ for (i = 0; i < unaligned_top; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + | |||||
+ (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); | |||||
+ | |||||
+ rp++; | |||||
+ } | |||||
+ | |||||
+ /* Using SIMD while we can */ | |||||
+ while( istop >= 16 ) | |||||
+ { | |||||
+ for(i=0;i < bpp ; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + | |||||
+ (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); | |||||
+ | |||||
+ rp++; | |||||
+ } | |||||
+ rp -= bpp; | |||||
+ pp -= bpp; | |||||
+ | |||||
+ vec_ld_unaligned(pp_vec,pp); | |||||
+ rp_vec = vec_ld(0,rp); | |||||
+ | |||||
+ rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4); | |||||
+ pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_4); | |||||
+ avg_vec = vec_avg(rp_part_vec,pp_part_vec); | |||||
+ avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); | |||||
+ rp_vec = vec_add(rp_vec,avg_vec); | |||||
+ | |||||
+ rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4); | |||||
+ pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_4); | |||||
+ avg_vec = vec_avg(rp_part_vec,pp_part_vec); | |||||
+ avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); | |||||
+ rp_vec = vec_add(rp_vec,avg_vec); | |||||
+ | |||||
+ rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4); | |||||
+ pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_4); | |||||
+ avg_vec = vec_avg(rp_part_vec,pp_part_vec); | |||||
+ avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); | |||||
+ rp_vec = vec_add(rp_vec,avg_vec); | |||||
+ | |||||
+ vec_st(rp_vec,0,rp); | |||||
+ | |||||
+ rp += 16; | |||||
+ pp += 16; | |||||
+ istop -= 16; | |||||
+ } | |||||
+ | |||||
+ if(istop > 0) | |||||
+ for (i = 0; i < istop % 16; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + | |||||
+ (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); | |||||
+ | |||||
+ rp++; | |||||
+ } | |||||
+} | |||||
+ | |||||
+void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row, | |||||
+ png_const_bytep prev_row) | |||||
+{ | |||||
+ const png_byte bpp = 3; | |||||
+ | |||||
+ vector unsigned char rp_vec; | |||||
+ vector unsigned char pp_vec; | |||||
+ vector unsigned char pp_part_vec; | |||||
+ vector unsigned char rp_part_vec; | |||||
+ vector unsigned char avg_vec; | |||||
+ | |||||
+ vsx_declare_common_vars(row_info,row,prev_row,bpp) | |||||
+ rp -= bpp; | |||||
+ if(istop >= bpp) | |||||
+ istop -= bpp; | |||||
+ | |||||
+ for (i = 0; i < bpp; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + | |||||
+ ((int)(*pp++) / 2 )) & 0xff); | |||||
+ | |||||
+ rp++; | |||||
+ } | |||||
+ | |||||
+ /* Altivec operations require 16-byte aligned data | |||||
+ * but input can be unaligned. So we calculate | |||||
+ * unaligned part as usual. | |||||
+ */ | |||||
+ for (i = 0; i < unaligned_top; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + | |||||
+ (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); | |||||
+ | |||||
+ rp++; | |||||
+ } | |||||
+ | |||||
+ /* Using SIMD while we can */ | |||||
+ while( istop >= 16 ) | |||||
+ { | |||||
+ for(i=0;i < bpp ; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + | |||||
+ (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); | |||||
+ | |||||
+ rp++; | |||||
+ } | |||||
+ rp -= bpp; | |||||
+ pp -= bpp; | |||||
+ | |||||
+ vec_ld_unaligned(pp_vec,pp); | |||||
+ rp_vec = vec_ld(0,rp); | |||||
+ | |||||
+ rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3); | |||||
+ pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_3); | |||||
+ avg_vec = vec_avg(rp_part_vec,pp_part_vec); | |||||
+ avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); | |||||
+ rp_vec = vec_add(rp_vec,avg_vec); | |||||
+ | |||||
+ rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3); | |||||
+ pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_3); | |||||
+ avg_vec = vec_avg(rp_part_vec,pp_part_vec); | |||||
+ avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); | |||||
+ rp_vec = vec_add(rp_vec,avg_vec); | |||||
+ | |||||
+ rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3); | |||||
+ pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_3); | |||||
+ avg_vec = vec_avg(rp_part_vec,pp_part_vec); | |||||
+ avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); | |||||
+ rp_vec = vec_add(rp_vec,avg_vec); | |||||
+ | |||||
+ rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3); | |||||
+ pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED4_3); | |||||
+ avg_vec = vec_avg(rp_part_vec,pp_part_vec); | |||||
+ avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); | |||||
+ rp_vec = vec_add(rp_vec,avg_vec); | |||||
+ | |||||
+ vec_st(rp_vec,0,rp); | |||||
+ | |||||
+ rp += 15; | |||||
+ pp += 15; | |||||
+ istop -= 16; | |||||
+ | |||||
+ /* Since 16 % bpp = 16 % 3 = 1, last element of array must | |||||
+ * be proceeded manually | |||||
+ */ | |||||
+ *rp = (png_byte)(((int)(*rp) + | |||||
+ (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); | |||||
+ rp++; | |||||
+ } | |||||
+ | |||||
+ if(istop > 0) | |||||
+ for (i = 0; i < istop % 16; i++) | |||||
+ { | |||||
+ *rp = (png_byte)(((int)(*rp) + | |||||
+ (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); | |||||
+ | |||||
+ rp++; | |||||
+ } | |||||
+} | |||||
+ | |||||
+/* Bytewise c ? t : e. */ | |||||
+#define if_then_else(c,t,e) vec_sel(e,t,c) | |||||
+ | |||||
+#define vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) {\ | |||||
+ c = *(pp - bpp);\ | |||||
+ a = *(rp - bpp);\ | |||||
+ b = *pp++;\ | |||||
+ p = b - c;\ | |||||
+ pc = a - c;\ | |||||
+ pa = vsx_abs(p);\ | |||||
+ pb = vsx_abs(pc);\ | |||||
+ pc = vsx_abs(p + pc);\ | |||||
+ if (pb < pa) pa = pb, a = b;\ | |||||
+ if (pc < pa) a = c;\ | |||||
+ a += *rp;\ | |||||
+ *rp++ = (png_byte)a;\ | |||||
+ } | |||||
+ | |||||
+void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, | |||||
+ png_const_bytep prev_row) | |||||
+{ | |||||
+ const png_byte bpp = 4; | |||||
+ | |||||
+ int a, b, c, pa, pb, pc, p; | |||||
+ vector unsigned char rp_vec; | |||||
+ vector unsigned char pp_vec; | |||||
+ vector unsigned short a_vec,b_vec,c_vec,nearest_vec; | |||||
+ vector signed short pa_vec,pb_vec,pc_vec,smallest_vec; | |||||
+ | |||||
+ vsx_declare_common_vars(row_info,row,prev_row,bpp) | |||||
+ rp -= bpp; | |||||
+ if(istop >= bpp) | |||||
+ istop -= bpp; | |||||
+ | |||||
+ /* Process the first pixel in the row completely (this is the same as 'up' | |||||
+ * because there is only one candidate predictor for the first row). | |||||
+ */ | |||||
+ for(i = 0; i < bpp ; i++) | |||||
+ { | |||||
+ *rp = (png_byte)( *rp + *pp); | |||||
+ rp++; | |||||
+ pp++; | |||||
+ } | |||||
+ | |||||
+ for(i = 0; i < unaligned_top ; i++) | |||||
+ { | |||||
+ vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) | |||||
+ } | |||||
+ | |||||
+ while( istop >= 16) | |||||
+ { | |||||
+ for(i = 0; i < bpp ; i++) | |||||
+ { | |||||
+ vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) | |||||
+ } | |||||
+ | |||||
+ rp -= bpp; | |||||
+ pp -= bpp; | |||||
+ rp_vec = vec_ld(0,rp); | |||||
+ vec_ld_unaligned(pp_vec,pp); | |||||
+ | |||||
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4); | |||||
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_4),1,4); | |||||
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4); | |||||
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec); | |||||
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec); | |||||
+ pc_vec = vec_add(pa_vec,pb_vec); | |||||
+ pa_vec = vec_abs(pa_vec); | |||||
+ pb_vec = vec_abs(pb_vec); | |||||
+ pc_vec = vec_abs(pc_vec); | |||||
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); | |||||
+ nearest_vec = if_then_else( | |||||
+ vec_cmpeq(pa_vec,smallest_vec), | |||||
+ a_vec, | |||||
+ if_then_else( | |||||
+ vec_cmpeq(pb_vec,smallest_vec), | |||||
+ b_vec, | |||||
+ c_vec | |||||
+ ) | |||||
+ ); | |||||
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4))); | |||||
+ | |||||
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4); | |||||
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_4),2,4); | |||||
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4); | |||||
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec); | |||||
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec); | |||||
+ pc_vec = vec_add(pa_vec,pb_vec); | |||||
+ pa_vec = vec_abs(pa_vec); | |||||
+ pb_vec = vec_abs(pb_vec); | |||||
+ pc_vec = vec_abs(pc_vec); | |||||
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); | |||||
+ nearest_vec = if_then_else( | |||||
+ vec_cmpeq(pa_vec,smallest_vec), | |||||
+ a_vec, | |||||
+ if_then_else( | |||||
+ vec_cmpeq(pb_vec,smallest_vec), | |||||
+ b_vec, | |||||
+ c_vec | |||||
+ ) | |||||
+ ); | |||||
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4))); | |||||
+ | |||||
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4); | |||||
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_4),3,4); | |||||
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4); | |||||
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec); | |||||
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec); | |||||
+ pc_vec = vec_add(pa_vec,pb_vec); | |||||
+ pa_vec = vec_abs(pa_vec); | |||||
+ pb_vec = vec_abs(pb_vec); | |||||
+ pc_vec = vec_abs(pc_vec); | |||||
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); | |||||
+ nearest_vec = if_then_else( | |||||
+ vec_cmpeq(pa_vec,smallest_vec), | |||||
+ a_vec, | |||||
+ if_then_else( | |||||
+ vec_cmpeq(pb_vec,smallest_vec), | |||||
+ b_vec, | |||||
+ c_vec | |||||
+ ) | |||||
+ ); | |||||
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,4))); | |||||
+ | |||||
+ vec_st(rp_vec,0,rp); | |||||
+ | |||||
+ rp += 16; | |||||
+ pp += 16; | |||||
+ istop -= 16; | |||||
+ } | |||||
+ | |||||
+ if(istop > 0) | |||||
+ for (i = 0; i < istop % 16; i++) | |||||
+ { | |||||
+ vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) | |||||
+ } | |||||
+} | |||||
+ | |||||
+void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row, | |||||
+ png_const_bytep prev_row) | |||||
+{ | |||||
+ const png_byte bpp = 3; | |||||
+ | |||||
+ int a, b, c, pa, pb, pc, p; | |||||
+ vector unsigned char rp_vec; | |||||
+ vector unsigned char pp_vec; | |||||
+ vector unsigned short a_vec,b_vec,c_vec,nearest_vec; | |||||
+ vector signed short pa_vec,pb_vec,pc_vec,smallest_vec; | |||||
+ | |||||
+ vsx_declare_common_vars(row_info,row,prev_row,bpp) | |||||
+ rp -= bpp; | |||||
+ if(istop >= bpp) | |||||
+ istop -= bpp; | |||||
+ | |||||
+ /* Process the first pixel in the row completely (this is the same as 'up' | |||||
+ * because there is only one candidate predictor for the first row). | |||||
+ */ | |||||
+ for(i = 0; i < bpp ; i++) | |||||
+ { | |||||
+ *rp = (png_byte)( *rp + *pp); | |||||
+ rp++; | |||||
+ pp++; | |||||
+ } | |||||
+ | |||||
+ for(i = 0; i < unaligned_top ; i++) | |||||
+ { | |||||
+ vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) | |||||
+ } | |||||
+ | |||||
+ while( istop >= 16) | |||||
+ { | |||||
+ for(i = 0; i < bpp ; i++) | |||||
+ { | |||||
+ vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) | |||||
+ } | |||||
+ | |||||
+ rp -= bpp; | |||||
+ pp -= bpp; | |||||
+ rp_vec = vec_ld(0,rp); | |||||
+ vec_ld_unaligned(pp_vec,pp); | |||||
+ | |||||
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3); | |||||
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_3),1,3); | |||||
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3); | |||||
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec); | |||||
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec); | |||||
+ pc_vec = vec_add(pa_vec,pb_vec); | |||||
+ pa_vec = vec_abs(pa_vec); | |||||
+ pb_vec = vec_abs(pb_vec); | |||||
+ pc_vec = vec_abs(pc_vec); | |||||
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); | |||||
+ nearest_vec = if_then_else( | |||||
+ vec_cmpeq(pa_vec,smallest_vec), | |||||
+ a_vec, | |||||
+ if_then_else( | |||||
+ vec_cmpeq(pb_vec,smallest_vec), | |||||
+ b_vec, | |||||
+ c_vec | |||||
+ ) | |||||
+ ); | |||||
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3))); | |||||
+ | |||||
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3); | |||||
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_3),2,3); | |||||
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3); | |||||
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec); | |||||
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec); | |||||
+ pc_vec = vec_add(pa_vec,pb_vec); | |||||
+ pa_vec = vec_abs(pa_vec); | |||||
+ pb_vec = vec_abs(pb_vec); | |||||
+ pc_vec = vec_abs(pc_vec); | |||||
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); | |||||
+ nearest_vec = if_then_else( | |||||
+ vec_cmpeq(pa_vec,smallest_vec), | |||||
+ a_vec, | |||||
+ if_then_else( | |||||
+ vec_cmpeq(pb_vec,smallest_vec), | |||||
+ b_vec, | |||||
+ c_vec | |||||
+ ) | |||||
+ ); | |||||
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3))); | |||||
+ | |||||
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3); | |||||
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_3),3,3); | |||||
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3); | |||||
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec); | |||||
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec); | |||||
+ pc_vec = vec_add(pa_vec,pb_vec); | |||||
+ pa_vec = vec_abs(pa_vec); | |||||
+ pb_vec = vec_abs(pb_vec); | |||||
+ pc_vec = vec_abs(pc_vec); | |||||
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); | |||||
+ nearest_vec = if_then_else( | |||||
+ vec_cmpeq(pa_vec,smallest_vec), | |||||
+ a_vec, | |||||
+ if_then_else( | |||||
+ vec_cmpeq(pb_vec,smallest_vec), | |||||
+ b_vec, | |||||
+ c_vec | |||||
+ ) | |||||
+ ); | |||||
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3))); | |||||
+ | |||||
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3); | |||||
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED4_3),4,3); | |||||
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3); | |||||
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec); | |||||
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec); | |||||
+ pc_vec = vec_add(pa_vec,pb_vec); | |||||
+ pa_vec = vec_abs(pa_vec); | |||||
+ pb_vec = vec_abs(pb_vec); | |||||
+ pc_vec = vec_abs(pc_vec); | |||||
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); | |||||
+ nearest_vec = if_then_else( | |||||
+ vec_cmpeq(pa_vec,smallest_vec), | |||||
+ a_vec, | |||||
+ if_then_else( | |||||
+ vec_cmpeq(pb_vec,smallest_vec), | |||||
+ b_vec, | |||||
+ c_vec | |||||
+ ) | |||||
+ ); | |||||
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,4,3))); | |||||
+ | |||||
+ vec_st(rp_vec,0,rp); | |||||
+ | |||||
+ rp += 15; | |||||
+ pp += 15; | |||||
+ istop -= 16; | |||||
+ | |||||
+ /* Since 16 % bpp = 16 % 3 = 1, last element of array must | |||||
+ * be proceeded manually | |||||
+ */ | |||||
+ vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) | |||||
+ } | |||||
+ | |||||
+ if(istop > 0) | |||||
+ for (i = 0; i < istop % 16; i++) | |||||
+ { | |||||
+ vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) | |||||
+ } | |||||
+} | |||||
+ | |||||
+#endif /* PNG_POWERPC_VSX_OPT > 0 */ | |||||
+#endif /* PNG_POWERPC_VSX_IMPLEMENTATION == 1 (intrinsics) */ | |||||
+#endif /* READ */ |