diff --git a/README.md b/README.md
index 0246789..5888603 100644
--- a/README.md
+++ b/README.md
@@ -101,8 +101,9 @@ Most others do not.
The Java Team at Intel (R) introduced the vector implementation for FastPFOR
based on the Java Vector API that showed significant gains over the
non-vectorized implementation. For an example usage, see
-examples/vector/Example.java. The feature requires JDK 19+ and is currently for
-advanced users.
+examples/vector/Example.java. On aarch64 (e.g. Graviton3), use JDK 24 or
+later: earlier releases lack the Vector API SVE intrinsics and run a fallback
+that is slower than the scalar codec.
JavaFastPFOR as a dependency
------------------------
diff --git a/pom.xml b/pom.xml
index 180a49d..5594a43 100644
--- a/pom.xml
+++ b/pom.xml
@@ -90,33 +90,17 @@
21
21
+
+ --add-modules
+ jdk.incubator.vector
+
+
+
+
+ maven-surefire-plugin
+
+ --add-modules jdk.incubator.vector
-
-
- default-compile
- compile
-
- compile
-
-
-
- me/lemire/integercompression/vector/*
- module-info.java
-
-
-
-
-
-
org.apache.felix
diff --git a/src/main/java/me/lemire/integercompression/vector/VectorBitPacker.java b/src/main/java/me/lemire/integercompression/vector/VectorBitPacker.java
index 9b2e1ca..1b0df04 100644
--- a/src/main/java/me/lemire/integercompression/vector/VectorBitPacker.java
+++ b/src/main/java/me/lemire/integercompression/vector/VectorBitPacker.java
@@ -15,77 +15,42 @@
* classes.
*
*/
-public class VectorBitPacker {
+public class VectorBitPacker implements VectorBitPackerKernels {
private static final VectorSpecies SPECIES_512 =
IntVector.SPECIES_512;
- private static final VectorSpecies SPECIES_256 =
- IntVector.SPECIES_256;
private static final int VLEN_512 = 16;
- private static final int VLEN_256 = 8;
private static final int BLOCK_SIZE = 256;
- private static final IntVector MASK_1 =
- IntVector.broadcast(SPECIES_256, (1 << 1) - 1);
private static final IntVector MASK_2 =
IntVector.broadcast(SPECIES_512, (1 << 2) - 1);
- private static final IntVector MASK_3 =
- IntVector.broadcast(SPECIES_256, (1 << 3) - 1);
private static final IntVector MASK_4 =
IntVector.broadcast(SPECIES_512, (1 << 4) - 1);
- private static final IntVector MASK_5 =
- IntVector.broadcast(SPECIES_256, (1 << 5) - 1);
private static final IntVector MASK_6 =
IntVector.broadcast(SPECIES_512, (1 << 6) - 1);
- private static final IntVector MASK_7 =
- IntVector.broadcast(SPECIES_256, (1 << 7) - 1);
private static final IntVector MASK_8 =
IntVector.broadcast(SPECIES_512, (1 << 8) - 1);
- private static final IntVector MASK_9 =
- IntVector.broadcast(SPECIES_256, (1 << 9) - 1);
private static final IntVector MASK_10 =
IntVector.broadcast(SPECIES_512, (1 << 10) - 1);
- private static final IntVector MASK_11 =
- IntVector.broadcast(SPECIES_256, (1 << 11) - 1);
private static final IntVector MASK_12 =
IntVector.broadcast(SPECIES_512, (1 << 12) - 1);
- private static final IntVector MASK_13 =
- IntVector.broadcast(SPECIES_256, (1 << 13) - 1);
private static final IntVector MASK_14 =
IntVector.broadcast(SPECIES_512, (1 << 14) - 1);
- private static final IntVector MASK_15 =
- IntVector.broadcast(SPECIES_256, (1 << 15) - 1);
private static final IntVector MASK_16 =
IntVector.broadcast(SPECIES_512, (1 << 16) - 1);
- private static final IntVector MASK_17 =
- IntVector.broadcast(SPECIES_256, (1 << 17) - 1);
private static final IntVector MASK_18 =
IntVector.broadcast(SPECIES_512, (1 << 18) - 1);
- private static final IntVector MASK_19 =
- IntVector.broadcast(SPECIES_256, (1 << 19) - 1);
private static final IntVector MASK_20 =
IntVector.broadcast(SPECIES_512, (1 << 20) - 1);
- private static final IntVector MASK_21 =
- IntVector.broadcast(SPECIES_256, (1 << 21) - 1);
private static final IntVector MASK_22 =
IntVector.broadcast(SPECIES_512, (1 << 22) - 1);
- private static final IntVector MASK_23 =
- IntVector.broadcast(SPECIES_256, (1 << 23) - 1);
private static final IntVector MASK_24 =
IntVector.broadcast(SPECIES_512, (1 << 24) - 1);
- private static final IntVector MASK_25 =
- IntVector.broadcast(SPECIES_256, (1 << 25) - 1);
private static final IntVector MASK_26 =
IntVector.broadcast(SPECIES_512, (1 << 26) - 1);
- private static final IntVector MASK_27 =
- IntVector.broadcast(SPECIES_256, (1 << 27) - 1);
private static final IntVector MASK_28 =
IntVector.broadcast(SPECIES_512, (1 << 28) - 1);
- private static final IntVector MASK_29 =
- IntVector.broadcast(SPECIES_256, (1 << 29) - 1);
private static final IntVector MASK_30 =
IntVector.broadcast(SPECIES_512, (1 << 30) - 1);
- private static final IntVector MASK_31 =
- IntVector.broadcast(SPECIES_256, (1 << 31) - 1);
/**
* Pack 32 integers
@@ -101,103 +66,104 @@ public class VectorBitPacker {
* @param b
* number of bits to use per integer
*/
- public static void fastpack(final int[] in, int inpos, final int[] out,
- int outpos, int b) {
+ @Override
+ public void fastpack(final int[] in, int inpos, final int[] out,
+ int outpos, int b) {
switch (b) {
case 0:
break;
case 1:
- fastpack1(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack1(in, inpos, out, outpos);
break;
case 2:
fastpack2(in, inpos, out, outpos);
break;
case 3:
- fastpack3(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack3(in, inpos, out, outpos);
break;
case 4:
fastpack4(in, inpos, out, outpos);
break;
case 5:
- fastpack5(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack5(in, inpos, out, outpos);
break;
case 6:
fastpack6(in, inpos, out, outpos);
break;
case 7:
- fastpack7(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack7(in, inpos, out, outpos);
break;
case 8:
fastpack8(in, inpos, out, outpos);
break;
case 9:
- fastpack9(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack9(in, inpos, out, outpos);
break;
case 10:
fastpack10(in, inpos, out, outpos);
break;
case 11:
- fastpack11(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack11(in, inpos, out, outpos);
break;
case 12:
fastpack12(in, inpos, out, outpos);
break;
case 13:
- fastpack13(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack13(in, inpos, out, outpos);
break;
case 14:
fastpack14(in, inpos, out, outpos);
break;
case 15:
- fastpack15(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack15(in, inpos, out, outpos);
break;
case 16:
fastpack16(in, inpos, out, outpos);
break;
case 17:
- fastpack17(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack17(in, inpos, out, outpos);
break;
case 18:
fastpack18(in, inpos, out, outpos);
break;
case 19:
- fastpack19(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack19(in, inpos, out, outpos);
break;
case 20:
fastpack20(in, inpos, out, outpos);
break;
case 21:
- fastpack21(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack21(in, inpos, out, outpos);
break;
case 22:
fastpack22(in, inpos, out, outpos);
break;
case 23:
- fastpack23(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack23(in, inpos, out, outpos);
break;
case 24:
fastpack24(in, inpos, out, outpos);
break;
case 25:
- fastpack25(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack25(in, inpos, out, outpos);
break;
case 26:
fastpack26(in, inpos, out, outpos);
break;
case 27:
- fastpack27(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack27(in, inpos, out, outpos);
break;
case 28:
fastpack28(in, inpos, out, outpos);
break;
case 29:
- fastpack29(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack29(in, inpos, out, outpos);
break;
case 30:
fastpack30(in, inpos, out, outpos);
break;
case 31:
- fastpack31(in, inpos, out, outpos);
+ VectorBitPacker256.fastpack31(in, inpos, out, outpos);
break;
case 32:
System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE);
@@ -205,103 +171,104 @@ public static void fastpack(final int[] in, int inpos, final int[] out,
}
}
- static void fastpackNoMask(final int[] in, int inpos, final int[] out,
+ @Override
+ public void fastpackNoMask(final int[] in, int inpos, final int[] out,
int outpos, int b) {
switch (b) {
case 0:
break;
case 1:
- fastpackNoMask1(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask1(in, inpos, out, outpos);
break;
case 2:
fastpackNoMask2(in, inpos, out, outpos);
break;
case 3:
- fastpackNoMask3(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask3(in, inpos, out, outpos);
break;
case 4:
fastpackNoMask4(in, inpos, out, outpos);
break;
case 5:
- fastpackNoMask5(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask5(in, inpos, out, outpos);
break;
case 6:
fastpackNoMask6(in, inpos, out, outpos);
break;
case 7:
- fastpackNoMask7(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask7(in, inpos, out, outpos);
break;
case 8:
fastpackNoMask8(in, inpos, out, outpos);
break;
case 9:
- fastpackNoMask9(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask9(in, inpos, out, outpos);
break;
case 10:
fastpackNoMask10(in, inpos, out, outpos);
break;
case 11:
- fastpackNoMask11(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask11(in, inpos, out, outpos);
break;
case 12:
fastpackNoMask12(in, inpos, out, outpos);
break;
case 13:
- fastpackNoMask13(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask13(in, inpos, out, outpos);
break;
case 14:
fastpackNoMask14(in, inpos, out, outpos);
break;
case 15:
- fastpackNoMask15(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask15(in, inpos, out, outpos);
break;
case 16:
fastpackNoMask16(in, inpos, out, outpos);
break;
case 17:
- fastpackNoMask17(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask17(in, inpos, out, outpos);
break;
case 18:
fastpackNoMask18(in, inpos, out, outpos);
break;
case 19:
- fastpackNoMask19(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask19(in, inpos, out, outpos);
break;
case 20:
fastpackNoMask20(in, inpos, out, outpos);
break;
case 21:
- fastpackNoMask21(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask21(in, inpos, out, outpos);
break;
case 22:
fastpackNoMask22(in, inpos, out, outpos);
break;
case 23:
- fastpackNoMask23(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask23(in, inpos, out, outpos);
break;
case 24:
fastpackNoMask24(in, inpos, out, outpos);
break;
case 25:
- fastpackNoMask25(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask25(in, inpos, out, outpos);
break;
case 26:
fastpackNoMask26(in, inpos, out, outpos);
break;
case 27:
- fastpackNoMask27(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask27(in, inpos, out, outpos);
break;
case 28:
fastpackNoMask28(in, inpos, out, outpos);
break;
case 29:
- fastpackNoMask29(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask29(in, inpos, out, outpos);
break;
case 30:
fastpackNoMask30(in, inpos, out, outpos);
break;
case 31:
- fastpackNoMask31(in, inpos, out, outpos);
+ VectorBitPacker256.fastpackNoMask31(in, inpos, out, outpos);
break;
case 32:
System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE);
@@ -323,104 +290,105 @@ static void fastpackNoMask(final int[] in, int inpos, final int[] out,
* @param b
* number of bits to use per integer
*/
- public static void fastunpack(final int[] in, int inpos, final int[] out,
- int outpos, int b) {
+ @Override
+ public void fastunpack(final int[] in, int inpos, final int[] out,
+ int outpos, int b) {
switch (b) {
case 0:
Arrays.fill(out, outpos, outpos + 256, 0);
break;
case 1:
- fastunpack1(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack1(in, inpos, out, outpos);
break;
case 2:
fastunpack2(in, inpos, out, outpos);
break;
case 3:
- fastunpack3(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack3(in, inpos, out, outpos);
break;
case 4:
fastunpack4(in, inpos, out, outpos);
break;
case 5:
- fastunpack5(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack5(in, inpos, out, outpos);
break;
case 6:
fastunpack6(in, inpos, out, outpos);
break;
case 7:
- fastunpack7(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack7(in, inpos, out, outpos);
break;
case 8:
fastunpack8(in, inpos, out, outpos);
break;
case 9:
- fastunpack9(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack9(in, inpos, out, outpos);
break;
case 10:
fastunpack10(in, inpos, out, outpos);
break;
case 11:
- fastunpack11(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack11(in, inpos, out, outpos);
break;
case 12:
fastunpack12(in, inpos, out, outpos);
break;
case 13:
- fastunpack13(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack13(in, inpos, out, outpos);
break;
case 14:
fastunpack14(in, inpos, out, outpos);
break;
case 15:
- fastunpack15(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack15(in, inpos, out, outpos);
break;
case 16:
fastunpack16(in, inpos, out, outpos);
break;
case 17:
- fastunpack17(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack17(in, inpos, out, outpos);
break;
case 18:
fastunpack18(in, inpos, out, outpos);
break;
case 19:
- fastunpack19(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack19(in, inpos, out, outpos);
break;
case 20:
fastunpack20(in, inpos, out, outpos);
break;
case 21:
- fastunpack21(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack21(in, inpos, out, outpos);
break;
case 22:
fastunpack22(in, inpos, out, outpos);
break;
case 23:
- fastunpack23(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack23(in, inpos, out, outpos);
break;
case 24:
fastunpack24(in, inpos, out, outpos);
break;
case 25:
- fastunpack25(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack25(in, inpos, out, outpos);
break;
case 26:
fastunpack26(in, inpos, out, outpos);
break;
case 27:
- fastunpack27(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack27(in, inpos, out, outpos);
break;
case 28:
fastunpack28(in, inpos, out, outpos);
break;
case 29:
- fastunpack29(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack29(in, inpos, out, outpos);
break;
case 30:
fastunpack30(in, inpos, out, outpos);
break;
case 31:
- fastunpack31(in, inpos, out, outpos);
+ VectorBitPacker256.fastunpack31(in, inpos, out, outpos);
break;
case 32:
System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE);
@@ -428,176 +396,6 @@ public static void fastunpack(final int[] in, int inpos, final int[] out,
}
}
- public static int slowpack(final int[] in, int inpos, int inlen,
- final int[] out, int outpos, int b) {
- if (inlen == 0)
- return outpos;
- if (b == 32) {
- System.arraycopy(in, inpos, out, outpos, inlen);
- return outpos + inlen;
- }
- int mask = (1 << b) - 1;
- int c = 0;
- int l = 0;
- int r = 0;
- int val = 0;
- for (int i = 0; i < inlen; i++) {
- val = in[inpos + i] & mask;
- out[outpos] |= val << (c + r);
- c += b;
- l = (32 - r) % b;
- if (c + r >= 32) {
- if (i < inlen - 1 || l != 0)
- outpos++;
- r = l == 0 ? 0 : b - l;
- if (l != 0)
- out[outpos] = val >> (b - r);
- c = 0;
- }
- }
- return outpos;
- }
-
- public static int slowunpack(final int[] in, int inpos, final int[] out,
- int outpos, int outlen, int b) {
- if (outlen == 0) {
- return inpos;
- }
- if (b == 32) {
- System.arraycopy(in, inpos, out, outpos, outlen);
- return inpos + outlen;
- }
- int mask = (1 << b) - 1;
- int limit = outpos + outlen;
- int r = 0;
- int val = 0;
- int i = 0;
- for (; outpos < limit; i++) {
- if (r > 0)
- out[outpos++] =
- (val >>> (32 - (b - r))) | ((in[inpos + i] << (b - r)) & mask);
- val = in[inpos + i];
- int j = 0;
- int l = 32 - r;
- int ll = l % b == 0 ? l : l - b;
- while (j < ll && outpos < limit) {
- out[outpos++] = (val >> (j + r)) & mask;
- j += b;
- }
- r = l % b == 0 ? 0 : b - (l % b);
- }
- return inpos + i;
- }
-
- public static int numCompressedInts(int n, int b) {
- int width = b % 2 == 0 ? VLEN_512 : VLEN_256;
- if (n <= width)
- return n;
- int intsPerVec = (32 / b) * width;
- int q = (n + intsPerVec - 1) / intsPerVec;
- return q * width;
- }
-
- private static void fastpack1(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_1);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 29).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
private static void fastpack2(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
@@ -650,116 +448,6 @@ private static void fastpack2(final int[] in, int inpos, final int[] out,
oV.intoArray(out, outpos);
}
- private static void fastpack3(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_3);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
private static void fastpack4(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
@@ -816,126 +504,6 @@ private static void fastpack4(final int[] in, int inpos, final int[] out,
oV.intoArray(out, outpos);
}
- private static void fastpack5(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_5);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
private static void fastpack6(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
@@ -997,136 +565,6 @@ private static void fastpack6(final int[] in, int inpos, final int[] out,
oV.intoArray(out, outpos);
}
- private static void fastpack7(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_7);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
private static void fastpack8(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
@@ -1193,146 +631,6 @@ private static void fastpack8(final int[] in, int inpos, final int[] out,
oV.intoArray(out, outpos);
}
- private static void fastpack9(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_9);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
private static void fastpack10(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
@@ -1404,169 +702,19 @@ private static void fastpack10(final int[] in, int inpos, final int[] out,
oV.intoArray(out, outpos);
}
- private static void fastpack11(final int[] in, int inpos, final int[] out,
+ private static void fastpack12(final int[] in, int inpos, final int[] out,
int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_11);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 11).or(oV);
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV.and(MASK_12);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 22).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpack12(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV.and(MASK_12);
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+ outpos += VLEN_512;
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
@@ -1630,166 +778,6 @@ private static void fastpack12(final int[] in, int inpos, final int[] out,
oV.intoArray(out, outpos);
}
- private static void fastpack13(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_13);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
private static void fastpack14(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
@@ -1871,176 +859,6 @@ private static void fastpack14(final int[] in, int inpos, final int[] out,
oV.intoArray(out, outpos);
}
- private static void fastpack15(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_15);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
private static void fastpack16(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
@@ -2127,510 +945,564 @@ private static void fastpack16(final int[] in, int inpos, final int[] out,
oV.intoArray(out, outpos);
}
- private static void fastpack17(final int[] in, int inpos, final int[] out,
+ private static void fastpack18(final int[] in, int inpos, final int[] out,
int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_17);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 17).or(oV);
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV.and(MASK_18);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 18).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 15);
+ outpos += VLEN_512;
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 14);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 2).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 4).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 19).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 22).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 13);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 10);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 4).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 8).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 21).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 26).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 11);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 6);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 6).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 12).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 23).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 9);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 2);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 25).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 7);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 10).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 2).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 27).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 5);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 12).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 6).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 29).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 3);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 14).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 10).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 31).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 1);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 4);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 16).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ }
+
+ private static void fastpack20(final int[] in, int inpos, final int[] out,
+ int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV.and(MASK_20);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 16);
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 1).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 18).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 14);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 3).or(oV);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 20).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 12);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 5).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 22).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 10);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 7).or(oV);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 24).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 8);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 9).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.and(MASK_20).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 26).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 6);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 11).or(oV);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 28).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 13).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 30).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 2);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 15).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack18(final int[] in, int inpos, final int[] out,
+ private static void fastpack22(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV.and(MASK_18);
+ var oV = iV.and(MASK_22);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 18).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 22).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 14);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 10);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 22).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 10);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 2).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 8);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 26).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 6);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 18);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 12).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 4).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 30).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 26).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 2);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 6);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 16).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 16);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 2).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 6).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 20).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 12);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 4);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 24).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 18).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 8).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 10).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 2);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 28).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 12);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 14).or(oV);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 10).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack19(final int[] in, int inpos, final int[] out,
+ private static void fastpack24(final int[] in, int inpos, final int[] out,
int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_19);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 19).or(oV);
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV.and(MASK_24);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 13);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 6).or(oV);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 25).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 7);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 12).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.and(MASK_24).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 31).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 1);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 18).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 14);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 5).or(oV);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 24).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 8);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 11).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.and(MASK_24).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 30).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 2);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 17).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 15);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 4).or(oV);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 23).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 9);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 10).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.and(MASK_24).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 29).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 3);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 16).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 16);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 3).or(oV);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 22).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ }
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 10);
+ private static void fastpack26(final int[] in, int inpos, final int[] out,
+ int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV.and(MASK_26);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 26).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 9).or(oV);
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 6);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 28).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 15).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 17);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 2).or(oV);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 21).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 11);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 24);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 8).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 2).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 27).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 5);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 4);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 14).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 22).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 18);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 1).or(oV);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 10);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 20).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 12);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 7).or(oV);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 26).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 10).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 22);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 6);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 4).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 13).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
- }
+ outpos += VLEN_512;
- private static void fastpack20(final int[] in, int inpos, final int[] out,
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpack28(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV.and(MASK_20);
+ var oV = iV.and(MASK_28);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
@@ -2638,611 +1510,522 @@ private static void fastpack20(final int[] in, int inpos, final int[] out,
oV = oV.zero(SPECIES_512);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.and(MASK_20).or(oV);
+ oV = iV.and(MASK_28).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack21(final int[] in, int inpos, final int[] out,
+ private static void fastpack30(final int[] in, int inpos, final int[] out,
int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_21);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 31).or(oV);
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV.and(MASK_30);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 1);
+ outpos += VLEN_512;
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 2);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 20).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 12);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 9).or(oV);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 4);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 30).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 26).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 2);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 6);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 19).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 13);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 29).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 22).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 3);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 10);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 18).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 14);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 7).or(oV);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 28).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 18).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 14);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 17).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 15);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 6).or(oV);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 27).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 5);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 16).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 16);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 5).or(oV);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 20);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 26).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 10).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 6);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 22);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 15).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 17);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 4).or(oV);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 24);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 25).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 6).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 7);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 26);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 14).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 18);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 3).or(oV);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 28);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 24).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 2).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 8);
+ }
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 13).or(oV);
+ private static void fastpackNoMask2(final int[] in, int inpos,
+ final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 19);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 2).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 23).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 9);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 12).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 20);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 1).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 22).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 10);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 11).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack22(final int[] in, int inpos, final int[] out,
- int outpos) {
+ private static void fastpackNoMask4(final int[] in, int inpos,
+ final int[] out, int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV.and(MASK_22);
+ var oV = iV;
iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 10);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 2).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 18);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 4).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 26).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
-
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 6);
+ oV = oV.zero(SPECIES_512);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 6).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 14);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 2);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 10).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack23(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_23);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 14).or(oV);
+ private static void fastpackNoMask6(final int[] in, int inpos,
+ final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 18);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 5).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 28).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 4);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 19).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 13);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 10).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 22);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 1).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 24).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 8);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 15).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 29).or(oV);
+ }
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ private static void fastpackNoMask8(final int[] in, int inpos,
+ final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 3);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 20).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 11).or(oV);
+ outpos += VLEN_512;
+ oV = oV.zero(SPECIES_512);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.or(oV);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 21);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 2).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 25).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 7);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 16).or(oV);
+ oV = oV.zero(SPECIES_512);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.or(oV);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 16);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 7).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 30).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 2);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 21).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.or(oV);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 11);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 12).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 26).or(oV);
+ }
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ private static void fastpackNoMask10(final int[] in, int inpos,
+ final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 6);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 17).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 15);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 8).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 31).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 1);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 22).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 10);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 13).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 19);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 4).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 27).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 5);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 18).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 14);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 9).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack24(final int[] in, int inpos, final int[] out,
- int outpos) {
+ private static void fastpackNoMask12(final int[] in, int inpos,
+ final int[] out, int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV.and(MASK_24);
+ var oV = iV;
iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
-
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(MASK_24).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
@@ -3250,7535 +2033,1600 @@ private static void fastpack24(final int[] in, int inpos, final int[] out,
oV = oV.zero(SPECIES_512);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.and(MASK_24).or(oV);
+ oV = iV.or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.and(MASK_24).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack25(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_25);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 7);
+ private static void fastpackNoMask14(final int[] in, int inpos,
+ final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 18).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 14);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 11).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 21);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 4).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 29).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 3);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 22).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 10);
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 15).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 17);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 8).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 24);
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 1).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 26).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 6);
+ }
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 19).or(oV);
+ private static void fastpackNoMask16(final int[] in, int inpos,
+ final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+ oV = oV.zero(SPECIES_512);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 13);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 12).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 20);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 5).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 30).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 2);
+ oV = oV.zero(SPECIES_512);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 23).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ oV = oV.zero(SPECIES_512);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 9);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 16).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ oV = oV.zero(SPECIES_512);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 16);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 9).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 23);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 2).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 27).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ oV = oV.zero(SPECIES_512);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 5);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 20).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 12);
+ }
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 13).or(oV);
+ private static void fastpackNoMask18(final int[] in, int inpos,
+ final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 19);
+ outpos += VLEN_512;
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 6).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 31).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 1);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 24).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 17).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 15);
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 10).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 22);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 3).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 28).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 4);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 21).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 11);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 14).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 18);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 7).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack26(final int[] in, int inpos, final int[] out,
- int outpos) {
+ private static void fastpackNoMask20(final int[] in, int inpos,
+ final int[] out, int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV.and(MASK_26);
+ var oV = iV;
iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 26).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 6);
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 14).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 18);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 24);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 2).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 28).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 22).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 10);
+ oV = oV.zero(SPECIES_512);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 10).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 22);
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 4).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 30).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 2);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 24).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 14);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 12).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 20);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 6).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack27(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_27);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 27).or(oV);
+ private static void fastpackNoMask22(final int[] in, int inpos,
+ final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 5);
+ outpos += VLEN_512;
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 22).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 15);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 12).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 20);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 7).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 25);
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 2).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 29).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 3);
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 24).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 19).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 13);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 14).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 18);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 9).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 23);
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 4).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 31).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 1);
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 26).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 6);
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 21).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 11);
+ }
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 16).or(oV);
+ private static void fastpackNoMask24(final int[] in, int inpos,
+ final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 16);
+ outpos += VLEN_512;
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 11).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 21);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 6).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 26);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 1).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 28).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 23).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 9);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 18).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 13).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ oV = oV.zero(SPECIES_512);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 19);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 8).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 24);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 3).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 30).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 2);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 25).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 20).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ oV = oV.zero(SPECIES_512);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 12);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 15).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 17);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 10).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 22);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 5).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack28(final int[] in, int inpos, final int[] out,
- int outpos) {
+ private static void fastpackNoMask26(final int[] in, int inpos,
+ final int[] out, int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV.and(MASK_28);
+ var oV = iV;
iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = oV.zero(SPECIES_512);
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.and(MASK_28).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack29(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_29);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 29).or(oV);
+ private static void fastpackNoMask28(final int[] in, int inpos,
+ final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ var oV = iV;
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 3);
+ outpos += VLEN_512;
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 26).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 6);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 23).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 9);
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 20).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 12);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 17).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 15);
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 14).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 18);
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 11).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 8).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ oV = oV.zero(SPECIES_512);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 24);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.or(oV);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 5).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 27);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 2).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 31).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 1);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 28).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 25).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 7);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 22).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 10);
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 19).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 13);
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 16).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 13).or(oV);
+ }
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 22);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 28);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 26);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpack30(final int[] in, int inpos, final int[] out,
- int outpos) {
+ private static void fastpackNoMask30(final int[] in, int inpos,
+ final int[] out, int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV.and(MASK_30);
+ var oV = iV;
iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 30).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 2);
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 28).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 4);
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 26).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 6);
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 24).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 8);
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 22).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 10);
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 20).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 12);
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 18).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 14);
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 16).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 16);
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 14).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 18);
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 12).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 20);
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 10).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 22);
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 8).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 24);
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 6).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 26);
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 4).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 28);
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 2).or(oV);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
oV.intoArray(out, outpos);
}
- private static void fastpack31(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV.and(MASK_31);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 1);
+ private static void fastunpack2(final int[] in, int inpos, final int[] out,
+ int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ iV.and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 30).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 2);
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 29).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 3);
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 28).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 4);
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 27).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 5);
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 26).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 6);
+ iV.lanewise(VectorOperators.LSHR, 30).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_512;
+ }
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 25).or(oV);
+ private static void fastunpack4(final int[] in, int inpos, final int[] out,
+ int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 7);
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 24).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 8);
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 23).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 9);
+ var oV = iV.and(MASK_4);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 22).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(0xf).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 10);
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 21).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 11);
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 20).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 12);
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_512;
+ }
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 19).or(oV);
+ private static void fastunpack6(final int[] in, int inpos, final int[] out,
+ int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ iV.and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 13);
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 18).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 14);
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_6);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 17).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 2).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 16).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 16);
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 15).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 17);
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_6);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 14).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 18);
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 13).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 19);
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 12).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_512;
+ }
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 22);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 24);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 26);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 28);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 30);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask1(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask2(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask3(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask4(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask5(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask6(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask7(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask8(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask9(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask10(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask11(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask12(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask13(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask14(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask15(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask16(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask17(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask18(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask19(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask20(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask21(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask22(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask23(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask24(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask25(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask26(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask27(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask28(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.or(oV);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask29(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask30(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastpackNoMask31(final int[] in, int inpos,
- final int[] out, int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- var oV = iV;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 1);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
- oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- }
-
- private static void fastunpack1(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 13).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 15).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 17).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 19).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 21).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 22).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 23).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 25).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 26).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 27).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 28).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 29).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 30).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 31).and(MASK_1).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
- private static void fastunpack2(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- iV.and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 22).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 26).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 28).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 30).and(MASK_2).intoArray(out, outpos);
- outpos += VLEN_512;
- }
-
- private static void fastunpack3(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 15).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 21).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 27).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 13).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 19).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 22).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 25).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 28).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_3);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 17).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 23).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 26).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 29).and(MASK_3).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
- private static void fastunpack4(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- iV.and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- var oV = iV.and(MASK_4);
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(0xf).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
- outpos += VLEN_512;
- }
-
- private static void fastunpack5(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 15).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 25).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 13).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 23).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 21).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 26).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 19).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_5);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 17).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 22).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 27).and(MASK_5).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
- private static void fastunpack6(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- iV.and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_6);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 22).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_6);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 26).and(MASK_6).intoArray(out, outpos);
- outpos += VLEN_512;
- }
-
- private static void fastunpack7(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 21).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 17).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 13).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 23).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 19).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 15).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 22).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_7);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 25).and(MASK_7).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
- private static void fastunpack8(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- iV.and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- var oV = iV.and(MASK_8);
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(0xff).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(0xff).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(0xff).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
- outpos += VLEN_512;
- }
-
- private static void fastunpack9(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 13).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 22).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 17).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 21).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 15).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 19).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_9);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 23).and(MASK_9).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
- private static void fastunpack10(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- iV.and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_10);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_10);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_10);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_10);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 22).and(MASK_10).intoArray(out, outpos);
- outpos += VLEN_512;
- }
-
- private static void fastunpack11(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 13).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 15).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 17).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 19).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_11);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 21).and(MASK_11).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
- private static void fastunpack12(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- iV.and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(0xfff).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
- outpos += VLEN_512;
- }
-
- private static void fastunpack13(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 13).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 15).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 17).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_13);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 19).and(MASK_13).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
- private static void fastunpack14(final int[] in, int inpos, final int[] out,
- int outpos) {
+ private static void fastunpack8(final int[] in, int inpos, final int[] out,
+ int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- iV.and(MASK_14).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_14).intoArray(out, outpos);
- outpos += VLEN_512;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_14);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_14).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_14);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_14).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_14);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_14).intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_14).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_14);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_14).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_14);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_14).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_14);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_14).intoArray(out, outpos);
+ iV.and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 18).and(MASK_14).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
- }
-
- private static void fastunpack15(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 15).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
- iV.lanewise(VectorOperators.LSHR, 13).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_15);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 17).and(MASK_15).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
- private static void fastunpack16(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- iV.and(MASK_16).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
- var oV = iV.and(MASK_16);
+ var oV = iV.and(MASK_8);
oV = oV.zero(SPECIES_512);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(0xffff).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(0xffff).or(oV);
+ oV = iV.and(0xff).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(0xffff).or(oV);
-
- oV.intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(0xffff).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(0xff).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(0xffff).or(oV);
-
- oV.intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.and(0xffff).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(0xff).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
- oV = oV.zero(SPECIES_512);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.and(0xffff).or(oV);
-
- oV.intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
outpos += VLEN_512;
}
- private static void fastunpack17(final int[] in, int inpos, final int[] out,
+ private static void fastunpack10(final int[] in, int inpos, final int[] out,
int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 15).or(oV);
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ iV.and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_17);
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_10);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 13).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 2).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 11).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_17);
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_10);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 9).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_17);
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_10);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 7).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 6).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_17);
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_10);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 5).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_17);
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 3).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_512;
+ }
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ private static void fastunpack12(final int[] in, int inpos, final int[] out,
+ int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_17);
+ var oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 1).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 16).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_17);
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 14).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 12).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_17);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 10).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(0xfff).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_17);
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 8).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 6).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_17);
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 4).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 13).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_17);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 2).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 15).and(MASK_17).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_512;
}
- private static void fastunpack18(final int[] in, int inpos, final int[] out,
+ private static void fastunpack14(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
- iV.and(MASK_18).intoArray(out, outpos);
+ iV.and(MASK_14).intoArray(out, outpos);
outpos += VLEN_512;
- var oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_18);
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_14);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 14).or(oV);
+ oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_18).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_14).intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_18);
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_14);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 10).or(oV);
+ oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_18).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_14).intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_18);
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_14);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 6).or(oV);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_18).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_14).intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_18);
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_14);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
- oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 2).or(oV);
+ oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 2).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_18);
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_14);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 16).or(oV);
+ oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 6).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_18).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_14).intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_18);
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_14);
iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 12).or(oV);
+ oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 10).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_18).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_14).intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_18);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 8).or(oV);
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_512;
+ }
- oV.intoArray(out, outpos);
+ private static void fastunpack16(final int[] in, int inpos, final int[] out,
+ int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ iV.and(MASK_16).intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_18).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_18);
+ var oV = iV.and(MASK_16);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 4).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(0xffff).or(oV);
oV.intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 14).and(MASK_18).intoArray(out, outpos);
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
outpos += VLEN_512;
- }
-
- private static void fastunpack19(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 13).or(oV);
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_19);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 7).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(0xffff).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 12).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 1).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_19);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 14).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(0xffff).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_19);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 8).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.and(0xffff).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 2).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_19);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 15).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.and(0xffff).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_19);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 9).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.and(0xffff).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_19);
+ oV = oV.zero(SPECIES_512);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 3).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.and(0xffff).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_19);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 16).or(oV);
+ outpos += VLEN_512;
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_512;
+ }
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
+ private static void fastunpack18(final int[] in, int inpos, final int[] out,
+ int outpos) {
+ var iV = IntVector.fromArray(SPECIES_512, in, inpos);
+ iV.and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_19);
+ var oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 10).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 16);
+ oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_19);
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 4).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 32);
+ oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 10).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_19);
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 17).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 48);
+ oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 6).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_19);
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 11).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 64);
+ oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 2).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_19);
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 5).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 80);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 16).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_19);
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_512;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 18).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 96);
+ oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_19);
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 12).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_19);
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_18);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 6).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 13).and(MASK_19).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_512;
}
private static void fastunpack20(final int[] in, int inpos, final int[] out,
@@ -10878,206 +3726,6 @@ private static void fastunpack20(final int[] in, int inpos, final int[] out,
outpos += VLEN_512;
}
- private static void fastunpack21(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_21);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 11).and(MASK_21).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
private static void fastunpack22(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
@@ -11138,256 +3786,46 @@ private static void fastunpack22(final int[] in, int inpos, final int[] out,
oV.intoArray(out, outpos);
outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_22).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_22);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
- oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_22);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_22).intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_22);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
- oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_22);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 10).and(MASK_22).intoArray(out, outpos);
- outpos += VLEN_512;
- }
-
- private static void fastunpack23(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_23).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_23).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_23).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_23).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_23).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_23).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(0x1fffff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_23).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 8).and(MASK_23).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_23);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_23);
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_22);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 10).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 112);
+ oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 4).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_23);
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_22);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 19).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 128);
+ oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_23).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_23);
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_22);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 5).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 144);
+ oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 2).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_23);
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_22);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 14).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 12).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 9).and(MASK_23).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_512;
}
private static void fastunpack24(final int[] in, int inpos, final int[] out,
@@ -11497,226 +3935,6 @@ private static void fastunpack24(final int[] in, int inpos, final int[] out,
outpos += VLEN_512;
}
- private static void fastunpack25(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_25).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_25).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_25).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_25).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(0x7fffff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_25).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_25).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_25).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(0x1fffff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_25);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 7).and(MASK_25).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
private static void fastunpack26(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
@@ -11799,264 +4017,34 @@ private static void fastunpack26(final int[] in, int inpos, final int[] out,
oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 2).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_26);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
- oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_26);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_26);
-
- iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_512;
-
- iV.lanewise(VectorOperators.LSHR, 6).and(MASK_26).intoArray(out, outpos);
- outpos += VLEN_512;
- }
-
- private static void fastunpack27(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_27).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_27).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 4).and(MASK_27).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(0x3ffffff).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(0x1fffff).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_27).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(0x7fffff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_27).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(0x1ffffff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_27);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_27);
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_26);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 12).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 160);
+ oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 8).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_27);
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_26);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 17).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 176);
+ oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 14).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_27);
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_26);
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 22).or(oV);
+ iV = IntVector.fromArray(SPECIES_512, in, inpos + 192);
+ oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 20).or(oV);
oV.intoArray(out, outpos);
- outpos += VLEN_256;
+ outpos += VLEN_512;
- iV.lanewise(VectorOperators.LSHR, 5).and(MASK_27).intoArray(out, outpos);
- outpos += VLEN_256;
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_512;
}
private static void fastunpack28(final int[] in, int inpos, final int[] out,
@@ -12176,246 +4164,6 @@ private static void fastunpack28(final int[] in, int inpos, final int[] out,
outpos += VLEN_512;
}
- private static void fastunpack29(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_29).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(0x3ffffff).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0x7fffff).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 2).and(MASK_29).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(0xfffffff).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(0x1ffffff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_29).intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(0x7ffffff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(0x1fffff).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_29);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 3).and(MASK_29).intoArray(out, outpos);
- outpos += VLEN_256;
- }
-
private static void fastunpack30(final int[] in, int inpos, final int[] out,
int outpos) {
var iV = IntVector.fromArray(SPECIES_512, in, inpos);
@@ -12538,253 +4286,4 @@ private static void fastunpack30(final int[] in, int inpos, final int[] out,
outpos += VLEN_512;
}
- private static void fastunpack31(final int[] in, int inpos, final int[] out,
- int outpos) {
- var iV = IntVector.fromArray(SPECIES_256, in, inpos);
- iV.and(MASK_31).intoArray(out, outpos);
- outpos += VLEN_256;
-
- var oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
- oV = iV.and(0x3fffffff).lanewise(VectorOperators.LSHL, 1).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
- oV = iV.and(0x1fffffff).lanewise(VectorOperators.LSHL, 2).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
- oV = iV.and(0xfffffff).lanewise(VectorOperators.LSHL, 3).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
- oV = iV.and(0x7ffffff).lanewise(VectorOperators.LSHL, 4).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
- oV = iV.and(0x3ffffff).lanewise(VectorOperators.LSHL, 5).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
- oV = iV.and(0x1ffffff).lanewise(VectorOperators.LSHL, 6).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
- oV = iV.and(0xffffff).lanewise(VectorOperators.LSHL, 7).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
- oV = iV.and(0x7fffff).lanewise(VectorOperators.LSHL, 8).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
- oV = iV.and(0x3fffff).lanewise(VectorOperators.LSHL, 9).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
- oV = iV.and(0x1fffff).lanewise(VectorOperators.LSHL, 10).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
- oV = iV.and(0xfffff).lanewise(VectorOperators.LSHL, 11).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
- oV = iV.and(0x7ffff).lanewise(VectorOperators.LSHL, 12).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
- oV = iV.and(0x3ffff).lanewise(VectorOperators.LSHL, 13).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
- oV = iV.and(0x1ffff).lanewise(VectorOperators.LSHL, 14).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
- oV = iV.and(0xffff).lanewise(VectorOperators.LSHL, 15).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
- oV = iV.and(0x7fff).lanewise(VectorOperators.LSHL, 16).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
- oV = iV.and(0x3fff).lanewise(VectorOperators.LSHL, 17).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
- oV = iV.and(0x1fff).lanewise(VectorOperators.LSHL, 18).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
- oV = iV.and(0xfff).lanewise(VectorOperators.LSHL, 19).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
- oV = iV.and(0x7ff).lanewise(VectorOperators.LSHL, 20).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
- oV = iV.and(0x3ff).lanewise(VectorOperators.LSHL, 21).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
- oV = iV.and(0x1ff).lanewise(VectorOperators.LSHL, 22).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
- oV = iV.and(0xff).lanewise(VectorOperators.LSHL, 23).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
- oV = iV.and(0x7f).lanewise(VectorOperators.LSHL, 24).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
- oV = iV.and(0x3f).lanewise(VectorOperators.LSHL, 25).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
- oV = iV.and(0x1f).lanewise(VectorOperators.LSHL, 26).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 5).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
- oV = iV.and(0xf).lanewise(VectorOperators.LSHL, 27).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
- oV = iV.and(7).lanewise(VectorOperators.LSHL, 28).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 3).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
- oV = iV.and(3).lanewise(VectorOperators.LSHL, 29).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- oV = iV.lanewise(VectorOperators.LSHR, 2).and(MASK_31);
-
- iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
- oV = iV.and(1).lanewise(VectorOperators.LSHL, 30).or(oV);
-
- oV.intoArray(out, outpos);
- outpos += VLEN_256;
-
- iV.lanewise(VectorOperators.LSHR, 1).and(MASK_31).intoArray(out, outpos);
- outpos += VLEN_256;
- }
}
diff --git a/src/main/java/me/lemire/integercompression/vector/VectorBitPacker128.java b/src/main/java/me/lemire/integercompression/vector/VectorBitPacker128.java
new file mode 100644
index 0000000..6e08546
--- /dev/null
+++ b/src/main/java/me/lemire/integercompression/vector/VectorBitPacker128.java
@@ -0,0 +1,31953 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.integercompression.vector;
+
+import java.util.Arrays;
+import jdk.incubator.vector.*;
+
+/**
+ * Vectorized bit-packing routines using 128-bit (4 x int32) vectors.
+ *
+ * A 256-integer block is packed across 4 SIMD lanes, each lane packing 64
+ * values into 2*b 32-bit words. Selected at runtime by VectorBitPackerKernels
+ * when the preferred hardware vector width is 128 bits (e.g. Arm NEON, Graviton).
+ */
+public class VectorBitPacker128 implements VectorBitPackerKernels {
+ private static final VectorSpecies SPECIES_128 =
+ IntVector.SPECIES_128;
+ private static final int VLEN_128 = 4;
+ private static final int BLOCK_SIZE = 256;
+
+ private static final IntVector MASK_1 =
+ IntVector.broadcast(SPECIES_128, (1 << 1) - 1);
+ private static final IntVector MASK_2 =
+ IntVector.broadcast(SPECIES_128, (1 << 2) - 1);
+ private static final IntVector MASK_3 =
+ IntVector.broadcast(SPECIES_128, (1 << 3) - 1);
+ private static final IntVector MASK_4 =
+ IntVector.broadcast(SPECIES_128, (1 << 4) - 1);
+ private static final IntVector MASK_5 =
+ IntVector.broadcast(SPECIES_128, (1 << 5) - 1);
+ private static final IntVector MASK_6 =
+ IntVector.broadcast(SPECIES_128, (1 << 6) - 1);
+ private static final IntVector MASK_7 =
+ IntVector.broadcast(SPECIES_128, (1 << 7) - 1);
+ private static final IntVector MASK_8 =
+ IntVector.broadcast(SPECIES_128, (1 << 8) - 1);
+ private static final IntVector MASK_9 =
+ IntVector.broadcast(SPECIES_128, (1 << 9) - 1);
+ private static final IntVector MASK_10 =
+ IntVector.broadcast(SPECIES_128, (1 << 10) - 1);
+ private static final IntVector MASK_11 =
+ IntVector.broadcast(SPECIES_128, (1 << 11) - 1);
+ private static final IntVector MASK_12 =
+ IntVector.broadcast(SPECIES_128, (1 << 12) - 1);
+ private static final IntVector MASK_13 =
+ IntVector.broadcast(SPECIES_128, (1 << 13) - 1);
+ private static final IntVector MASK_14 =
+ IntVector.broadcast(SPECIES_128, (1 << 14) - 1);
+ private static final IntVector MASK_15 =
+ IntVector.broadcast(SPECIES_128, (1 << 15) - 1);
+ private static final IntVector MASK_16 =
+ IntVector.broadcast(SPECIES_128, (1 << 16) - 1);
+ private static final IntVector MASK_17 =
+ IntVector.broadcast(SPECIES_128, (1 << 17) - 1);
+ private static final IntVector MASK_18 =
+ IntVector.broadcast(SPECIES_128, (1 << 18) - 1);
+ private static final IntVector MASK_19 =
+ IntVector.broadcast(SPECIES_128, (1 << 19) - 1);
+ private static final IntVector MASK_20 =
+ IntVector.broadcast(SPECIES_128, (1 << 20) - 1);
+ private static final IntVector MASK_21 =
+ IntVector.broadcast(SPECIES_128, (1 << 21) - 1);
+ private static final IntVector MASK_22 =
+ IntVector.broadcast(SPECIES_128, (1 << 22) - 1);
+ private static final IntVector MASK_23 =
+ IntVector.broadcast(SPECIES_128, (1 << 23) - 1);
+ private static final IntVector MASK_24 =
+ IntVector.broadcast(SPECIES_128, (1 << 24) - 1);
+ private static final IntVector MASK_25 =
+ IntVector.broadcast(SPECIES_128, (1 << 25) - 1);
+ private static final IntVector MASK_26 =
+ IntVector.broadcast(SPECIES_128, (1 << 26) - 1);
+ private static final IntVector MASK_27 =
+ IntVector.broadcast(SPECIES_128, (1 << 27) - 1);
+ private static final IntVector MASK_28 =
+ IntVector.broadcast(SPECIES_128, (1 << 28) - 1);
+ private static final IntVector MASK_29 =
+ IntVector.broadcast(SPECIES_128, (1 << 29) - 1);
+ private static final IntVector MASK_30 =
+ IntVector.broadcast(SPECIES_128, (1 << 30) - 1);
+ private static final IntVector MASK_31 =
+ IntVector.broadcast(SPECIES_128, (1 << 31) - 1);
+
+ @Override
+ public void fastpack(final int[] in, int inpos, final int[] out,
+ int outpos, int b) {
+ switch (b) {
+ case 0:
+ break;
+ case 1:
+ fastpack1(in, inpos, out, outpos);
+ break;
+ case 2:
+ fastpack2(in, inpos, out, outpos);
+ break;
+ case 3:
+ fastpack3(in, inpos, out, outpos);
+ break;
+ case 4:
+ fastpack4(in, inpos, out, outpos);
+ break;
+ case 5:
+ fastpack5(in, inpos, out, outpos);
+ break;
+ case 6:
+ fastpack6(in, inpos, out, outpos);
+ break;
+ case 7:
+ fastpack7(in, inpos, out, outpos);
+ break;
+ case 8:
+ fastpack8(in, inpos, out, outpos);
+ break;
+ case 9:
+ fastpack9(in, inpos, out, outpos);
+ break;
+ case 10:
+ fastpack10(in, inpos, out, outpos);
+ break;
+ case 11:
+ fastpack11(in, inpos, out, outpos);
+ break;
+ case 12:
+ fastpack12(in, inpos, out, outpos);
+ break;
+ case 13:
+ fastpack13(in, inpos, out, outpos);
+ break;
+ case 14:
+ fastpack14(in, inpos, out, outpos);
+ break;
+ case 15:
+ fastpack15(in, inpos, out, outpos);
+ break;
+ case 16:
+ fastpack16(in, inpos, out, outpos);
+ break;
+ case 17:
+ fastpack17(in, inpos, out, outpos);
+ break;
+ case 18:
+ fastpack18(in, inpos, out, outpos);
+ break;
+ case 19:
+ fastpack19(in, inpos, out, outpos);
+ break;
+ case 20:
+ fastpack20(in, inpos, out, outpos);
+ break;
+ case 21:
+ fastpack21(in, inpos, out, outpos);
+ break;
+ case 22:
+ fastpack22(in, inpos, out, outpos);
+ break;
+ case 23:
+ fastpack23(in, inpos, out, outpos);
+ break;
+ case 24:
+ fastpack24(in, inpos, out, outpos);
+ break;
+ case 25:
+ fastpack25(in, inpos, out, outpos);
+ break;
+ case 26:
+ fastpack26(in, inpos, out, outpos);
+ break;
+ case 27:
+ fastpack27(in, inpos, out, outpos);
+ break;
+ case 28:
+ fastpack28(in, inpos, out, outpos);
+ break;
+ case 29:
+ fastpack29(in, inpos, out, outpos);
+ break;
+ case 30:
+ fastpack30(in, inpos, out, outpos);
+ break;
+ case 31:
+ fastpack31(in, inpos, out, outpos);
+ break;
+ case 32:
+ System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE);
+ break;
+ }
+ }
+
+ @Override
+ public void fastpackNoMask(final int[] in, int inpos, final int[] out,
+ int outpos, int b) {
+ switch (b) {
+ case 0:
+ break;
+ case 1:
+ fastpackNoMask1(in, inpos, out, outpos);
+ break;
+ case 2:
+ fastpackNoMask2(in, inpos, out, outpos);
+ break;
+ case 3:
+ fastpackNoMask3(in, inpos, out, outpos);
+ break;
+ case 4:
+ fastpackNoMask4(in, inpos, out, outpos);
+ break;
+ case 5:
+ fastpackNoMask5(in, inpos, out, outpos);
+ break;
+ case 6:
+ fastpackNoMask6(in, inpos, out, outpos);
+ break;
+ case 7:
+ fastpackNoMask7(in, inpos, out, outpos);
+ break;
+ case 8:
+ fastpackNoMask8(in, inpos, out, outpos);
+ break;
+ case 9:
+ fastpackNoMask9(in, inpos, out, outpos);
+ break;
+ case 10:
+ fastpackNoMask10(in, inpos, out, outpos);
+ break;
+ case 11:
+ fastpackNoMask11(in, inpos, out, outpos);
+ break;
+ case 12:
+ fastpackNoMask12(in, inpos, out, outpos);
+ break;
+ case 13:
+ fastpackNoMask13(in, inpos, out, outpos);
+ break;
+ case 14:
+ fastpackNoMask14(in, inpos, out, outpos);
+ break;
+ case 15:
+ fastpackNoMask15(in, inpos, out, outpos);
+ break;
+ case 16:
+ fastpackNoMask16(in, inpos, out, outpos);
+ break;
+ case 17:
+ fastpackNoMask17(in, inpos, out, outpos);
+ break;
+ case 18:
+ fastpackNoMask18(in, inpos, out, outpos);
+ break;
+ case 19:
+ fastpackNoMask19(in, inpos, out, outpos);
+ break;
+ case 20:
+ fastpackNoMask20(in, inpos, out, outpos);
+ break;
+ case 21:
+ fastpackNoMask21(in, inpos, out, outpos);
+ break;
+ case 22:
+ fastpackNoMask22(in, inpos, out, outpos);
+ break;
+ case 23:
+ fastpackNoMask23(in, inpos, out, outpos);
+ break;
+ case 24:
+ fastpackNoMask24(in, inpos, out, outpos);
+ break;
+ case 25:
+ fastpackNoMask25(in, inpos, out, outpos);
+ break;
+ case 26:
+ fastpackNoMask26(in, inpos, out, outpos);
+ break;
+ case 27:
+ fastpackNoMask27(in, inpos, out, outpos);
+ break;
+ case 28:
+ fastpackNoMask28(in, inpos, out, outpos);
+ break;
+ case 29:
+ fastpackNoMask29(in, inpos, out, outpos);
+ break;
+ case 30:
+ fastpackNoMask30(in, inpos, out, outpos);
+ break;
+ case 31:
+ fastpackNoMask31(in, inpos, out, outpos);
+ break;
+ case 32:
+ System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE);
+ break;
+ }
+ }
+
+ @Override
+ public void fastunpack(final int[] in, int inpos, final int[] out,
+ int outpos, int b) {
+ switch (b) {
+ case 0:
+ Arrays.fill(out, outpos, outpos + 256, 0);
+ break;
+ case 1:
+ fastunpack1(in, inpos, out, outpos);
+ break;
+ case 2:
+ fastunpack2(in, inpos, out, outpos);
+ break;
+ case 3:
+ fastunpack3(in, inpos, out, outpos);
+ break;
+ case 4:
+ fastunpack4(in, inpos, out, outpos);
+ break;
+ case 5:
+ fastunpack5(in, inpos, out, outpos);
+ break;
+ case 6:
+ fastunpack6(in, inpos, out, outpos);
+ break;
+ case 7:
+ fastunpack7(in, inpos, out, outpos);
+ break;
+ case 8:
+ fastunpack8(in, inpos, out, outpos);
+ break;
+ case 9:
+ fastunpack9(in, inpos, out, outpos);
+ break;
+ case 10:
+ fastunpack10(in, inpos, out, outpos);
+ break;
+ case 11:
+ fastunpack11(in, inpos, out, outpos);
+ break;
+ case 12:
+ fastunpack12(in, inpos, out, outpos);
+ break;
+ case 13:
+ fastunpack13(in, inpos, out, outpos);
+ break;
+ case 14:
+ fastunpack14(in, inpos, out, outpos);
+ break;
+ case 15:
+ fastunpack15(in, inpos, out, outpos);
+ break;
+ case 16:
+ fastunpack16(in, inpos, out, outpos);
+ break;
+ case 17:
+ fastunpack17(in, inpos, out, outpos);
+ break;
+ case 18:
+ fastunpack18(in, inpos, out, outpos);
+ break;
+ case 19:
+ fastunpack19(in, inpos, out, outpos);
+ break;
+ case 20:
+ fastunpack20(in, inpos, out, outpos);
+ break;
+ case 21:
+ fastunpack21(in, inpos, out, outpos);
+ break;
+ case 22:
+ fastunpack22(in, inpos, out, outpos);
+ break;
+ case 23:
+ fastunpack23(in, inpos, out, outpos);
+ break;
+ case 24:
+ fastunpack24(in, inpos, out, outpos);
+ break;
+ case 25:
+ fastunpack25(in, inpos, out, outpos);
+ break;
+ case 26:
+ fastunpack26(in, inpos, out, outpos);
+ break;
+ case 27:
+ fastunpack27(in, inpos, out, outpos);
+ break;
+ case 28:
+ fastunpack28(in, inpos, out, outpos);
+ break;
+ case 29:
+ fastunpack29(in, inpos, out, outpos);
+ break;
+ case 30:
+ fastunpack30(in, inpos, out, outpos);
+ break;
+ case 31:
+ fastunpack31(in, inpos, out, outpos);
+ break;
+ case 32:
+ System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE);
+ break;
+ }
+ }
+
+ private static void fastpack1(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask1(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack1(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 27).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 29).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 30).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 31).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ iV.and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 27).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 29).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 30).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 31).and(MASK_1).intoArray(out, outpos);
+ }
+
+ private static void fastpack2(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask2(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack2(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 30).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ iV.and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 30).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ iV.and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 30).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ iV.and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 30).and(MASK_2).intoArray(out, outpos);
+ }
+
+ private static void fastpack3(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask3(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack3(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 27).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 29).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ iV.and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 27).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 29).and(MASK_3).intoArray(out, outpos);
+ }
+
+ private static void fastpack4(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask4(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack4(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ }
+
+ private static void fastpack5(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask5(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack5(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 27).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ iV.and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 27).and(MASK_5).intoArray(out, outpos);
+ }
+
+ private static void fastpack6(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask6(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack6(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ iV.and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ iV.and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ iV.and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_6).intoArray(out, outpos);
+ }
+
+ private static void fastpack7(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask7(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack7(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ iV.and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_7).intoArray(out, outpos);
+ }
+
+ private static void fastpack8(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask8(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack8(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ }
+
+ private static void fastpack9(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask9(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack9(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ iV.and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_9).intoArray(out, outpos);
+ }
+
+ private static void fastpack10(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask10(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack10(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ iV.and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ iV.and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ iV.and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_10).intoArray(out, outpos);
+ }
+
+ private static void fastpack11(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask11(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack11(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ iV.and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_11).intoArray(out, outpos);
+ }
+
+ private static void fastpack12(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask12(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack12(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ }
+
+ private static void fastpack13(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask13(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack13(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ iV.and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_13).intoArray(out, outpos);
+ }
+
+ private static void fastpack14(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask14(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack14(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ iV.and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ iV.and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ iV.and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_14).intoArray(out, outpos);
+ }
+
+ private static void fastpack15(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask15(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack15(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ iV.and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_15).intoArray(out, outpos);
+ }
+
+ private static void fastpack16(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask16(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack16(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ }
+
+ private static void fastpack17(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask17(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack17(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ iV.and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_17).intoArray(out, outpos);
+ }
+
+ private static void fastpack18(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask18(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack18(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ iV.and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ iV.and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ iV.and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_18).intoArray(out, outpos);
+ }
+
+ private static void fastpack19(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask19(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack19(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ iV.and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_19).intoArray(out, outpos);
+ }
+
+ private static void fastpack20(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask20(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack20(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ }
+
+ private static void fastpack21(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask21(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack21(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ iV.and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_21).intoArray(out, outpos);
+ }
+
+ private static void fastpack22(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask22(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack22(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ iV.and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ iV.and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ iV.and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_22).intoArray(out, outpos);
+ }
+
+ private static void fastpack23(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask23(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack23(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ iV.and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_23).intoArray(out, outpos);
+ }
+
+ private static void fastpack24(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask24(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack24(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ }
+
+ private static void fastpack25(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask25(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack25(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ iV.and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_25).intoArray(out, outpos);
+ }
+
+ private static void fastpack26(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask26(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack26(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ iV.and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ iV.and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ iV.and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_26).intoArray(out, outpos);
+ }
+
+ private static void fastpack27(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask27(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack27(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(33554431).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ iV.and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(33554431).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_27).intoArray(out, outpos);
+ }
+
+ private static void fastpack28(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask28(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack28(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ }
+
+ private static void fastpack29(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask29(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack29(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_29).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_29).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(33554431).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_29).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(134217727).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_29).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ iV.and(MASK_29).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_29).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(33554431).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_29).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(134217727).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_29).intoArray(out, outpos);
+ }
+
+ private static void fastpack30(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask30(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack30(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_30).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_30).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ iV.and(MASK_30).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_30).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ iV.and(MASK_30).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_30).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ iV.and(MASK_30).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_30).intoArray(out, outpos);
+ }
+
+ private static void fastpack31(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV.and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastpackNoMask31(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 252);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ private static void fastunpack31(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_128, in, inpos);
+ iV.and(MASK_31).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 4);
+ oV = iV.and(1073741823).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 8);
+ oV = iV.and(536870911).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 12);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 16);
+ oV = iV.and(134217727).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 20);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 24);
+ oV = iV.and(33554431).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 28);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 32);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 36);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 40);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 44);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 48);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 52);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 56);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 60);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 64);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 68);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 72);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 76);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 80);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 84);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 88);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 92);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 96);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 100);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 104);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 108);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 112);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 116);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 120);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_31).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 124);
+ iV.and(MASK_31).intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 128);
+ oV = iV.and(1073741823).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 132);
+ oV = iV.and(536870911).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 136);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 140);
+ oV = iV.and(134217727).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 144);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 148);
+ oV = iV.and(33554431).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 152);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 156);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 160);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 164);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 168);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 172);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 176);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 180);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 184);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 188);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 192);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 196);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 200);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 204);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 208);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 212);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 216);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 220);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 224);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 228);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 232);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 236);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 240);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_128, in, inpos + 244);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_128;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_31).intoArray(out, outpos);
+ }
+
+}
diff --git a/src/main/java/me/lemire/integercompression/vector/VectorBitPacker256.java b/src/main/java/me/lemire/integercompression/vector/VectorBitPacker256.java
new file mode 100644
index 0000000..02596f5
--- /dev/null
+++ b/src/main/java/me/lemire/integercompression/vector/VectorBitPacker256.java
@@ -0,0 +1,16226 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.integercompression.vector;
+
+import java.util.Arrays;
+import jdk.incubator.vector.*;
+
+/**
+ * Vectorized bit-packing routines using 256-bit (8 x int32) vectors.
+ *
+ * A 256-integer block is packed across 8 SIMD lanes, each lane packing 32
+ * values into b 32-bit words. Selected at runtime by VectorBitPackerKernels
+ * when the preferred hardware vector width is 256 bits (e.g. AVX2, Graviton3),
+ * where the 512-bit paths of VectorBitPacker fall back to slow emulation.
+ */
+public class VectorBitPacker256 implements VectorBitPackerKernels {
+ private static final VectorSpecies SPECIES_256 =
+ IntVector.SPECIES_256;
+ private static final int VLEN_256 = 8;
+ private static final int BLOCK_SIZE = 256;
+
+ private static final IntVector MASK_1 =
+ IntVector.broadcast(SPECIES_256, (1 << 1) - 1);
+ private static final IntVector MASK_2 =
+ IntVector.broadcast(SPECIES_256, (1 << 2) - 1);
+ private static final IntVector MASK_3 =
+ IntVector.broadcast(SPECIES_256, (1 << 3) - 1);
+ private static final IntVector MASK_4 =
+ IntVector.broadcast(SPECIES_256, (1 << 4) - 1);
+ private static final IntVector MASK_5 =
+ IntVector.broadcast(SPECIES_256, (1 << 5) - 1);
+ private static final IntVector MASK_6 =
+ IntVector.broadcast(SPECIES_256, (1 << 6) - 1);
+ private static final IntVector MASK_7 =
+ IntVector.broadcast(SPECIES_256, (1 << 7) - 1);
+ private static final IntVector MASK_8 =
+ IntVector.broadcast(SPECIES_256, (1 << 8) - 1);
+ private static final IntVector MASK_9 =
+ IntVector.broadcast(SPECIES_256, (1 << 9) - 1);
+ private static final IntVector MASK_10 =
+ IntVector.broadcast(SPECIES_256, (1 << 10) - 1);
+ private static final IntVector MASK_11 =
+ IntVector.broadcast(SPECIES_256, (1 << 11) - 1);
+ private static final IntVector MASK_12 =
+ IntVector.broadcast(SPECIES_256, (1 << 12) - 1);
+ private static final IntVector MASK_13 =
+ IntVector.broadcast(SPECIES_256, (1 << 13) - 1);
+ private static final IntVector MASK_14 =
+ IntVector.broadcast(SPECIES_256, (1 << 14) - 1);
+ private static final IntVector MASK_15 =
+ IntVector.broadcast(SPECIES_256, (1 << 15) - 1);
+ private static final IntVector MASK_16 =
+ IntVector.broadcast(SPECIES_256, (1 << 16) - 1);
+ private static final IntVector MASK_17 =
+ IntVector.broadcast(SPECIES_256, (1 << 17) - 1);
+ private static final IntVector MASK_18 =
+ IntVector.broadcast(SPECIES_256, (1 << 18) - 1);
+ private static final IntVector MASK_19 =
+ IntVector.broadcast(SPECIES_256, (1 << 19) - 1);
+ private static final IntVector MASK_20 =
+ IntVector.broadcast(SPECIES_256, (1 << 20) - 1);
+ private static final IntVector MASK_21 =
+ IntVector.broadcast(SPECIES_256, (1 << 21) - 1);
+ private static final IntVector MASK_22 =
+ IntVector.broadcast(SPECIES_256, (1 << 22) - 1);
+ private static final IntVector MASK_23 =
+ IntVector.broadcast(SPECIES_256, (1 << 23) - 1);
+ private static final IntVector MASK_24 =
+ IntVector.broadcast(SPECIES_256, (1 << 24) - 1);
+ private static final IntVector MASK_25 =
+ IntVector.broadcast(SPECIES_256, (1 << 25) - 1);
+ private static final IntVector MASK_26 =
+ IntVector.broadcast(SPECIES_256, (1 << 26) - 1);
+ private static final IntVector MASK_27 =
+ IntVector.broadcast(SPECIES_256, (1 << 27) - 1);
+ private static final IntVector MASK_28 =
+ IntVector.broadcast(SPECIES_256, (1 << 28) - 1);
+ private static final IntVector MASK_29 =
+ IntVector.broadcast(SPECIES_256, (1 << 29) - 1);
+ private static final IntVector MASK_30 =
+ IntVector.broadcast(SPECIES_256, (1 << 30) - 1);
+ private static final IntVector MASK_31 =
+ IntVector.broadcast(SPECIES_256, (1 << 31) - 1);
+
+ @Override
+ public void fastpack(final int[] in, int inpos, final int[] out,
+ int outpos, int b) {
+ switch (b) {
+ case 0:
+ break;
+ case 1:
+ fastpack1(in, inpos, out, outpos);
+ break;
+ case 2:
+ fastpack2(in, inpos, out, outpos);
+ break;
+ case 3:
+ fastpack3(in, inpos, out, outpos);
+ break;
+ case 4:
+ fastpack4(in, inpos, out, outpos);
+ break;
+ case 5:
+ fastpack5(in, inpos, out, outpos);
+ break;
+ case 6:
+ fastpack6(in, inpos, out, outpos);
+ break;
+ case 7:
+ fastpack7(in, inpos, out, outpos);
+ break;
+ case 8:
+ fastpack8(in, inpos, out, outpos);
+ break;
+ case 9:
+ fastpack9(in, inpos, out, outpos);
+ break;
+ case 10:
+ fastpack10(in, inpos, out, outpos);
+ break;
+ case 11:
+ fastpack11(in, inpos, out, outpos);
+ break;
+ case 12:
+ fastpack12(in, inpos, out, outpos);
+ break;
+ case 13:
+ fastpack13(in, inpos, out, outpos);
+ break;
+ case 14:
+ fastpack14(in, inpos, out, outpos);
+ break;
+ case 15:
+ fastpack15(in, inpos, out, outpos);
+ break;
+ case 16:
+ fastpack16(in, inpos, out, outpos);
+ break;
+ case 17:
+ fastpack17(in, inpos, out, outpos);
+ break;
+ case 18:
+ fastpack18(in, inpos, out, outpos);
+ break;
+ case 19:
+ fastpack19(in, inpos, out, outpos);
+ break;
+ case 20:
+ fastpack20(in, inpos, out, outpos);
+ break;
+ case 21:
+ fastpack21(in, inpos, out, outpos);
+ break;
+ case 22:
+ fastpack22(in, inpos, out, outpos);
+ break;
+ case 23:
+ fastpack23(in, inpos, out, outpos);
+ break;
+ case 24:
+ fastpack24(in, inpos, out, outpos);
+ break;
+ case 25:
+ fastpack25(in, inpos, out, outpos);
+ break;
+ case 26:
+ fastpack26(in, inpos, out, outpos);
+ break;
+ case 27:
+ fastpack27(in, inpos, out, outpos);
+ break;
+ case 28:
+ fastpack28(in, inpos, out, outpos);
+ break;
+ case 29:
+ fastpack29(in, inpos, out, outpos);
+ break;
+ case 30:
+ fastpack30(in, inpos, out, outpos);
+ break;
+ case 31:
+ fastpack31(in, inpos, out, outpos);
+ break;
+ case 32:
+ System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE);
+ break;
+ }
+ }
+
+ @Override
+ public void fastpackNoMask(final int[] in, int inpos, final int[] out,
+ int outpos, int b) {
+ switch (b) {
+ case 0:
+ break;
+ case 1:
+ fastpackNoMask1(in, inpos, out, outpos);
+ break;
+ case 2:
+ fastpackNoMask2(in, inpos, out, outpos);
+ break;
+ case 3:
+ fastpackNoMask3(in, inpos, out, outpos);
+ break;
+ case 4:
+ fastpackNoMask4(in, inpos, out, outpos);
+ break;
+ case 5:
+ fastpackNoMask5(in, inpos, out, outpos);
+ break;
+ case 6:
+ fastpackNoMask6(in, inpos, out, outpos);
+ break;
+ case 7:
+ fastpackNoMask7(in, inpos, out, outpos);
+ break;
+ case 8:
+ fastpackNoMask8(in, inpos, out, outpos);
+ break;
+ case 9:
+ fastpackNoMask9(in, inpos, out, outpos);
+ break;
+ case 10:
+ fastpackNoMask10(in, inpos, out, outpos);
+ break;
+ case 11:
+ fastpackNoMask11(in, inpos, out, outpos);
+ break;
+ case 12:
+ fastpackNoMask12(in, inpos, out, outpos);
+ break;
+ case 13:
+ fastpackNoMask13(in, inpos, out, outpos);
+ break;
+ case 14:
+ fastpackNoMask14(in, inpos, out, outpos);
+ break;
+ case 15:
+ fastpackNoMask15(in, inpos, out, outpos);
+ break;
+ case 16:
+ fastpackNoMask16(in, inpos, out, outpos);
+ break;
+ case 17:
+ fastpackNoMask17(in, inpos, out, outpos);
+ break;
+ case 18:
+ fastpackNoMask18(in, inpos, out, outpos);
+ break;
+ case 19:
+ fastpackNoMask19(in, inpos, out, outpos);
+ break;
+ case 20:
+ fastpackNoMask20(in, inpos, out, outpos);
+ break;
+ case 21:
+ fastpackNoMask21(in, inpos, out, outpos);
+ break;
+ case 22:
+ fastpackNoMask22(in, inpos, out, outpos);
+ break;
+ case 23:
+ fastpackNoMask23(in, inpos, out, outpos);
+ break;
+ case 24:
+ fastpackNoMask24(in, inpos, out, outpos);
+ break;
+ case 25:
+ fastpackNoMask25(in, inpos, out, outpos);
+ break;
+ case 26:
+ fastpackNoMask26(in, inpos, out, outpos);
+ break;
+ case 27:
+ fastpackNoMask27(in, inpos, out, outpos);
+ break;
+ case 28:
+ fastpackNoMask28(in, inpos, out, outpos);
+ break;
+ case 29:
+ fastpackNoMask29(in, inpos, out, outpos);
+ break;
+ case 30:
+ fastpackNoMask30(in, inpos, out, outpos);
+ break;
+ case 31:
+ fastpackNoMask31(in, inpos, out, outpos);
+ break;
+ case 32:
+ System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE);
+ break;
+ }
+ }
+
+ @Override
+ public void fastunpack(final int[] in, int inpos, final int[] out,
+ int outpos, int b) {
+ switch (b) {
+ case 0:
+ Arrays.fill(out, outpos, outpos + 256, 0);
+ break;
+ case 1:
+ fastunpack1(in, inpos, out, outpos);
+ break;
+ case 2:
+ fastunpack2(in, inpos, out, outpos);
+ break;
+ case 3:
+ fastunpack3(in, inpos, out, outpos);
+ break;
+ case 4:
+ fastunpack4(in, inpos, out, outpos);
+ break;
+ case 5:
+ fastunpack5(in, inpos, out, outpos);
+ break;
+ case 6:
+ fastunpack6(in, inpos, out, outpos);
+ break;
+ case 7:
+ fastunpack7(in, inpos, out, outpos);
+ break;
+ case 8:
+ fastunpack8(in, inpos, out, outpos);
+ break;
+ case 9:
+ fastunpack9(in, inpos, out, outpos);
+ break;
+ case 10:
+ fastunpack10(in, inpos, out, outpos);
+ break;
+ case 11:
+ fastunpack11(in, inpos, out, outpos);
+ break;
+ case 12:
+ fastunpack12(in, inpos, out, outpos);
+ break;
+ case 13:
+ fastunpack13(in, inpos, out, outpos);
+ break;
+ case 14:
+ fastunpack14(in, inpos, out, outpos);
+ break;
+ case 15:
+ fastunpack15(in, inpos, out, outpos);
+ break;
+ case 16:
+ fastunpack16(in, inpos, out, outpos);
+ break;
+ case 17:
+ fastunpack17(in, inpos, out, outpos);
+ break;
+ case 18:
+ fastunpack18(in, inpos, out, outpos);
+ break;
+ case 19:
+ fastunpack19(in, inpos, out, outpos);
+ break;
+ case 20:
+ fastunpack20(in, inpos, out, outpos);
+ break;
+ case 21:
+ fastunpack21(in, inpos, out, outpos);
+ break;
+ case 22:
+ fastunpack22(in, inpos, out, outpos);
+ break;
+ case 23:
+ fastunpack23(in, inpos, out, outpos);
+ break;
+ case 24:
+ fastunpack24(in, inpos, out, outpos);
+ break;
+ case 25:
+ fastunpack25(in, inpos, out, outpos);
+ break;
+ case 26:
+ fastunpack26(in, inpos, out, outpos);
+ break;
+ case 27:
+ fastunpack27(in, inpos, out, outpos);
+ break;
+ case 28:
+ fastunpack28(in, inpos, out, outpos);
+ break;
+ case 29:
+ fastunpack29(in, inpos, out, outpos);
+ break;
+ case 30:
+ fastunpack30(in, inpos, out, outpos);
+ break;
+ case 31:
+ fastunpack31(in, inpos, out, outpos);
+ break;
+ case 32:
+ System.arraycopy(in, inpos, out, outpos, BLOCK_SIZE);
+ break;
+ }
+ }
+
+ static void fastpack1(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_1).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask1(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack1(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 27).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 29).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 30).and(MASK_1).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 31).and(MASK_1).intoArray(out, outpos);
+ }
+
+ static void fastpack2(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_2).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask2(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack2(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 30).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ iV.and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_2).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 30).and(MASK_2).intoArray(out, outpos);
+ }
+
+ static void fastpack3(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_3).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask3(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack3(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 27).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_3).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 29).and(MASK_3).intoArray(out, outpos);
+ }
+
+ static void fastpack4(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_4).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask4(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack4(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ iV.and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_4).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 28).and(MASK_4).intoArray(out, outpos);
+ }
+
+ static void fastpack5(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_5).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask5(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack5(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_5).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 27).and(MASK_5).intoArray(out, outpos);
+ }
+
+ static void fastpack6(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_6).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask6(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack6(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ iV.and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_6).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 26).and(MASK_6).intoArray(out, outpos);
+ }
+
+ static void fastpack7(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_7).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask7(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack7(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_7).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 25).and(MASK_7).intoArray(out, outpos);
+ }
+
+ static void fastpack8(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_8).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask8(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack8(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ iV.and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_8).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 24).and(MASK_8).intoArray(out, outpos);
+ }
+
+ static void fastpack9(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_9).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask9(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack9(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_9).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 23).and(MASK_9).intoArray(out, outpos);
+ }
+
+ static void fastpack10(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_10).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask10(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack10(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ iV.and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_10).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 22).and(MASK_10).intoArray(out, outpos);
+ }
+
+ static void fastpack11(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_11).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask11(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack11(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_11).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 21).and(MASK_11).intoArray(out, outpos);
+ }
+
+ static void fastpack12(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_12).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask12(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack12(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ iV.and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_12).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 20).and(MASK_12).intoArray(out, outpos);
+ }
+
+ static void fastpack13(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_13).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask13(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack13(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_13).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 19).and(MASK_13).intoArray(out, outpos);
+ }
+
+ static void fastpack14(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_14).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask14(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack14(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ iV.and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_14).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 18).and(MASK_14).intoArray(out, outpos);
+ }
+
+ static void fastpack15(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_15).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask15(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack15(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_15).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 17).and(MASK_15).intoArray(out, outpos);
+ }
+
+ static void fastpack16(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_16).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask16(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack16(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ iV.and(MASK_16).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 16).and(MASK_16).intoArray(out, outpos);
+ }
+
+ static void fastpack17(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_17).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask17(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack17(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_17).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 15).and(MASK_17).intoArray(out, outpos);
+ }
+
+ static void fastpack18(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_18).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask18(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack18(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ iV.and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_18).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 14).and(MASK_18).intoArray(out, outpos);
+ }
+
+ static void fastpack19(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_19).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask19(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack19(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_19).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 13).and(MASK_19).intoArray(out, outpos);
+ }
+
+ static void fastpack20(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_20).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask20(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack20(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ iV.and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_20).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 12).and(MASK_20).intoArray(out, outpos);
+ }
+
+ static void fastpack21(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_21).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask21(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack21(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_21).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 11).and(MASK_21).intoArray(out, outpos);
+ }
+
+ static void fastpack22(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_22).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask22(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack22(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ iV.and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_22).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 10).and(MASK_22).intoArray(out, outpos);
+ }
+
+ static void fastpack23(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_23).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask23(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack23(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_23).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 9).and(MASK_23).intoArray(out, outpos);
+ }
+
+ static void fastpack24(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_24).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask24(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack24(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ iV.and(MASK_24).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 8).and(MASK_24).intoArray(out, outpos);
+ }
+
+ static void fastpack25(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_25).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask25(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack25(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_25).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 7).and(MASK_25).intoArray(out, outpos);
+ }
+
+ static void fastpack26(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_26).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask26(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack26(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ iV.and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_26).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 6).and(MASK_26).intoArray(out, outpos);
+ }
+
+ static void fastpack27(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_27).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask27(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack27(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_27).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(33554431).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 5).and(MASK_27).intoArray(out, outpos);
+ }
+
+ static void fastpack28(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_28).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask28(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack28(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ iV.and(MASK_28).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 4).and(MASK_28).intoArray(out, outpos);
+ }
+
+ static void fastpack29(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_29).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask29(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack29(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_29).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_29).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(33554431).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_29).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(134217727).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 3).and(MASK_29).intoArray(out, outpos);
+ }
+
+ static void fastpack30(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_30).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask30(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack30(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_30).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_30).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ iV.and(MASK_30).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 2).and(MASK_30).intoArray(out, outpos);
+ }
+
+ static void fastpack31(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV.and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHR, 30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.and(MASK_31).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastpackNoMask31(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ var oV = iV;
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.lanewise(VectorOperators.LSHL, 31).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 1);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 248);
+ oV = iV.lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ }
+
+ static void fastunpack31(final int[] in, int inpos, final int[] out, int outpos) {
+ var iV = IntVector.fromArray(SPECIES_256, in, inpos);
+ iV.and(MASK_31).intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ var oV = iV.lanewise(VectorOperators.LSHR, 31).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 8);
+ oV = iV.and(1073741823).lanewise(VectorOperators.LSHL, 1).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 30).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 16);
+ oV = iV.and(536870911).lanewise(VectorOperators.LSHL, 2).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 29).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 24);
+ oV = iV.and(268435455).lanewise(VectorOperators.LSHL, 3).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 28).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 32);
+ oV = iV.and(134217727).lanewise(VectorOperators.LSHL, 4).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 27).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 40);
+ oV = iV.and(67108863).lanewise(VectorOperators.LSHL, 5).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 26).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 48);
+ oV = iV.and(33554431).lanewise(VectorOperators.LSHL, 6).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 25).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 56);
+ oV = iV.and(16777215).lanewise(VectorOperators.LSHL, 7).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 24).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 64);
+ oV = iV.and(8388607).lanewise(VectorOperators.LSHL, 8).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 23).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 72);
+ oV = iV.and(4194303).lanewise(VectorOperators.LSHL, 9).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 22).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 80);
+ oV = iV.and(2097151).lanewise(VectorOperators.LSHL, 10).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 21).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 88);
+ oV = iV.and(1048575).lanewise(VectorOperators.LSHL, 11).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 20).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 96);
+ oV = iV.and(524287).lanewise(VectorOperators.LSHL, 12).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 19).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 104);
+ oV = iV.and(262143).lanewise(VectorOperators.LSHL, 13).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 18).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 112);
+ oV = iV.and(131071).lanewise(VectorOperators.LSHL, 14).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 17).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 120);
+ oV = iV.and(65535).lanewise(VectorOperators.LSHL, 15).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 16).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 128);
+ oV = iV.and(32767).lanewise(VectorOperators.LSHL, 16).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 15).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 136);
+ oV = iV.and(16383).lanewise(VectorOperators.LSHL, 17).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 14).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 144);
+ oV = iV.and(8191).lanewise(VectorOperators.LSHL, 18).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 13).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 152);
+ oV = iV.and(4095).lanewise(VectorOperators.LSHL, 19).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 12).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 160);
+ oV = iV.and(2047).lanewise(VectorOperators.LSHL, 20).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 11).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 168);
+ oV = iV.and(1023).lanewise(VectorOperators.LSHL, 21).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 10).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 176);
+ oV = iV.and(511).lanewise(VectorOperators.LSHL, 22).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 9).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 184);
+ oV = iV.and(255).lanewise(VectorOperators.LSHL, 23).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 8).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 192);
+ oV = iV.and(127).lanewise(VectorOperators.LSHL, 24).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 7).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 200);
+ oV = iV.and(63).lanewise(VectorOperators.LSHL, 25).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 6).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 208);
+ oV = iV.and(31).lanewise(VectorOperators.LSHL, 26).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 5).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 216);
+ oV = iV.and(15).lanewise(VectorOperators.LSHL, 27).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 4).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 224);
+ oV = iV.and(7).lanewise(VectorOperators.LSHL, 28).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 3).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 232);
+ oV = iV.and(3).lanewise(VectorOperators.LSHL, 29).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ oV = iV.lanewise(VectorOperators.LSHR, 2).and(MASK_31);
+
+ iV = IntVector.fromArray(SPECIES_256, in, inpos + 240);
+ oV = iV.and(1).lanewise(VectorOperators.LSHL, 30).or(oV);
+
+ oV.intoArray(out, outpos);
+ outpos += VLEN_256;
+
+ iV.lanewise(VectorOperators.LSHR, 1).and(MASK_31).intoArray(out, outpos);
+ }
+
+}
diff --git a/src/main/java/me/lemire/integercompression/vector/VectorBitPackerKernels.java b/src/main/java/me/lemire/integercompression/vector/VectorBitPackerKernels.java
new file mode 100644
index 0000000..9179827
--- /dev/null
+++ b/src/main/java/me/lemire/integercompression/vector/VectorBitPackerKernels.java
@@ -0,0 +1,78 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.integercompression.vector;
+
+import jdk.incubator.vector.IntVector;
+
+/**
+ * Width-specific vectorized bit-packing kernels for a 256-integer block.
+ * Implemented by VectorBitPacker (512-bit lanes), VectorBitPacker256 (256-bit
+ * lanes) and VectorBitPacker128 (128-bit lanes). The packed layout differs per
+ * width, so a stream is decoded by the same kernel that packed it.
+ */
+public interface VectorBitPackerKernels {
+
+ void fastpack(int[] in, int inpos, int[] out, int outpos, int b);
+
+ void fastpackNoMask(int[] in, int inpos, int[] out, int outpos, int b);
+
+ void fastunpack(int[] in, int inpos, int[] out, int outpos, int b);
+
+ /**
+ * Hardware vector lane width a stream was packed for. The packed byte layout
+ * differs per width and is not interchangeable across widths, so the stream
+ * carries its width (as {@link #code}) and is decoded by {@link #kernel}.
+ */
+ enum LaneWidth {
+ BITS_128(0, 128, new VectorBitPacker128()),
+ BITS_256(1, 256, new VectorBitPacker256()),
+ BITS_512(2, 512, new VectorBitPacker());
+
+ /** Compact wire tag stored in the stream (fits in 2 bits). */
+ public final int code;
+ /** Native vector lane width in bits. */
+ public final int bits;
+ /** Kernel that packs and unpacks at this width. */
+ public final VectorBitPackerKernels kernel;
+
+ LaneWidth(int code, int bits, VectorBitPackerKernels kernel) {
+ this.code = code;
+ this.bits = bits;
+ this.kernel = kernel;
+ }
+
+ /** Width whose kernel runs natively on this machine (encode default). */
+ public static final LaneWidth PREFERRED =
+ forHost(IntVector.SPECIES_PREFERRED.vectorBitSize());
+
+ /** Largest kernel width that runs natively on a machine of {@code hostBits}. */
+ public static LaneWidth forHost(int hostBits) {
+ LaneWidth best = null;
+ for (LaneWidth width : values()) {
+ if (width.bits <= hostBits && (best == null || width.bits > best.bits)) {
+ best = width;
+ }
+ }
+ if (best == null) {
+ throw new IllegalStateException(
+ "no vector bit-packing kernel fits this machine's preferred vector width of "
+ + hostBits + " bits");
+ }
+ return best;
+ }
+
+ /** Maps a stream wire tag back to its width. */
+ public static LaneWidth fromCode(int code) {
+ for (LaneWidth width : values()) {
+ if (width.code == code) {
+ return width;
+ }
+ }
+ throw new IllegalArgumentException("unknown vector lane-width tag " + code);
+ }
+ }
+}
diff --git a/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java b/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java
index 7374fa5..cc755c8 100644
--- a/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java
+++ b/src/main/java/me/lemire/integercompression/vector/VectorFastPFOR.java
@@ -12,6 +12,7 @@
import me.lemire.integercompression.IntegerCODEC;
import me.lemire.integercompression.SkippableIntegerCODEC;
import me.lemire.integercompression.IntWrapper;
+import me.lemire.integercompression.vector.VectorBitPackerKernels.LaneWidth;
/**
* This is a patching scheme designed for speed.
@@ -41,16 +42,33 @@
* For multi-threaded applications, each thread should use its own FastPFOR
* object.
*
+ * Blocks are packed in a vectorized layout that differs by hardware vector
+ * lane width, so each stream is tagged with the width it was packed for and is
+ * decoded by the matching kernel. Decoding requires a machine whose preferred
+ * vector width is at least the stream's; a narrower machine fails fast rather
+ * than emulating. The default constructor packs at this machine's preferred
+ * width; the {@code (int, LaneWidth)} constructor pins a width so a
+ * heterogeneous cluster can decode on its narrowest node.
+ *
* @author Daniel Lemire
*/
public class VectorFastPFOR implements IntegerCODEC, SkippableIntegerCODEC {
private final static int OVERHEAD_OF_EACH_EXCEPT = 8;
+ private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36;
+ private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1;
public final static int DEFAULT_PAGE_SIZE = 64 << 10;
public final static int BLOCK_SIZE = 256;
private final static int INTS_PER_BLOCK = BLOCK_SIZE >>> 5;
+ // The page header word holds the metadata offset in its low 30 bits and the
+ // packing lane-width tag in its top 2 bits.
+ private final static int WIDTH_SHIFT = 30;
+ private final static int WHEREMETA_MASK = (1 << WIDTH_SHIFT) - 1;
+
private final int pageSize;
+ private final LaneWidth encodeWidth;
+ private final VectorBitPackerKernels encoder;
private final int[][] dataTobePacked = new int[33][];
private int[] exceptData = null;
@@ -64,9 +82,18 @@ public class VectorFastPFOR implements IntegerCODEC, SkippableIntegerCODEC {
* @param pagesize
* the desired page size (recommended value is
* FastPFOR.DEFAULT_PAGE_SIZE)
+ * @param encodeWidth
+ * the vector lane width to pack with. Use
+ * {@link LaneWidth#PREFERRED} for this machine's fastest layout, or pin a
+ * cluster to its narrowest node's width so every node can decode the stream.
*/
- private VectorFastPFOR(int pagesize) {
+ public VectorFastPFOR(int pagesize, LaneWidth encodeWidth) {
+ if (pagesize >= (1 << WIDTH_SHIFT))
+ throw new IllegalArgumentException("page size must be smaller than "
+ + (1 << WIDTH_SHIFT));
pageSize = pagesize;
+ this.encodeWidth = encodeWidth;
+ this.encoder = encodeWidth.kernel;
// Initiate arrrays.
bem = new byte[3 * pageSize / BLOCK_SIZE + pagesize];
for (int k = 1; k < dataTobePacked.length; ++k)
@@ -75,9 +102,10 @@ private VectorFastPFOR(int pagesize) {
}
/**
- * Construct the fastPFOR CODEC with default parameters.
+ * Construct the fastPFOR CODEC with default parameters, packing with this
+ * machine's preferred vector lane width.
*/
- public VectorFastPFOR() { this(DEFAULT_PAGE_SIZE); }
+ public VectorFastPFOR() { this(DEFAULT_PAGE_SIZE, LaneWidth.PREFERRED); }
/**
* Compress data in blocks of BLOCK_SIZE integers (if fewer than BLOCK_SIZE
@@ -165,11 +193,11 @@ private void encodePage(int[] in, IntWrapper inpos, int thissize, int[] out,
} else {
bindex += 2;
}
- VectorBitPacker.fastpack(in, tmpinpos, out, tmpoutpos, tmpbestb);
+ encoder.fastpack(in, tmpinpos, out, tmpoutpos, tmpbestb);
tmpoutpos += INTS_PER_BLOCK * tmpbestb;
}
inpos.set(tmpinpos);
- out[headerpos] = tmpoutpos - headerpos;
+ out[headerpos] = (tmpoutpos - headerpos) | (encodeWidth.code << WIDTH_SHIFT);
int bytesize = bindex;
out[tmpoutpos++] = bytesize;
@@ -196,13 +224,13 @@ private void encodePage(int[] in, IntWrapper inpos, int thissize, int[] out,
int j = 0;
int n = (dataPointers[k] / BLOCK_SIZE) * BLOCK_SIZE;
for (; j < n; j += BLOCK_SIZE) {
- VectorBitPacker.fastpackNoMask(dataTobePacked[k], j, out, tmpoutpos,
+ encoder.fastpackNoMask(dataTobePacked[k], j, out, tmpoutpos,
k);
tmpoutpos += INTS_PER_BLOCK * k;
}
int r = dataPointers[k] % BLOCK_SIZE;
if (r != 0) {
- tmpoutpos = VectorBitPacker.slowpack(dataTobePacked[k], j, r, out,
+ tmpoutpos = slowpack(dataTobePacked[k], j, r, out,
tmpoutpos, k);
tmpoutpos++;
}
@@ -231,7 +259,11 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
@Override
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
- throw new UnsupportedOperationException("Calculating the max compressed length is not supported yet.");
+ inlength = inlength - inlength % BLOCK_SIZE;
+ int pageCount = (inlength + pageSize - 1) / pageSize;
+ int blockCount = inlength / BLOCK_SIZE;
+ int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE;
+ return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24;
}
private void loadMetaData(int[] in, int inexcept, int bytesize) {
@@ -246,10 +278,29 @@ private void loadMetaData(int[] in, int inexcept, int bytesize) {
}
}
+ /**
+ * Rejects a stream packed for wider lanes than this machine runs natively.
+ * Decoding it would silently fall back to scalar emulation; failing loud lets
+ * the caller re-encode the cluster at a lower width instead.
+ */
+ static void checkDecodable(LaneWidth streamWidth, LaneWidth hostWidth) {
+ if (streamWidth.bits > hostWidth.bits) {
+ throw new IllegalStateException(
+ "VectorFastPFOR stream was packed for " + streamWidth.bits
+ + "-bit vector lanes, but this machine runs at most " + hostWidth.bits
+ + "-bit lanes natively. Re-encode with lanes <= " + hostWidth.bits
+ + " bits.");
+ }
+ }
+
private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
final int initpos = inpos.get();
- final int wheremeta = in[inpos.get()];
+ final int header = in[inpos.get()];
+ final int wheremeta = header & WHEREMETA_MASK;
+ final LaneWidth streamWidth = LaneWidth.fromCode(header >>> WIDTH_SHIFT);
+ checkDecodable(streamWidth, LaneWidth.PREFERRED);
+ final VectorBitPackerKernels decoder = streamWidth.kernel;
inpos.increment();
int inexcept = initpos + wheremeta;
@@ -268,11 +319,11 @@ private void decodePage(int[] in, IntWrapper inpos, int[] out,
int j = 0;
int len = (size / BLOCK_SIZE) * BLOCK_SIZE;
for (; j < len; j += BLOCK_SIZE) {
- VectorBitPacker.fastunpack(in, inexcept, dataTobePacked[k], j, k);
+ decoder.fastunpack(in, inexcept, dataTobePacked[k], j, k);
inexcept += INTS_PER_BLOCK * k;
}
int r = size % BLOCK_SIZE;
- inexcept = VectorBitPacker.slowunpack(in, inexcept, dataTobePacked[k],
+ inexcept = slowunpack(in, inexcept, dataTobePacked[k],
j, r, k);
} else {
int j = 0;
@@ -282,12 +333,12 @@ private void decodePage(int[] in, IntWrapper inpos, int[] out,
System.arraycopy(in, inexcept, buf, 0, in.length - inexcept);
int l = (size / BLOCK_SIZE) * BLOCK_SIZE;
for (; j < l; j += BLOCK_SIZE) {
- VectorBitPacker.fastunpack(buf, inexcept - initinexcept,
+ decoder.fastunpack(buf, inexcept - initinexcept,
dataTobePacked[k], j, k);
inexcept += INTS_PER_BLOCK * k;
}
int r = size % BLOCK_SIZE;
- inexcept = VectorBitPacker.slowunpack(in, inexcept, dataTobePacked[k],
+ inexcept = slowunpack(in, inexcept, dataTobePacked[k],
j, r, k);
}
}
@@ -300,7 +351,7 @@ private void decodePage(int[] in, IntWrapper inpos, int[] out,
++run, tmpoutpos += BLOCK_SIZE) {
final int b = bem[idx]; // byteContainer.get();
final int cexcept = bem[idx + 1] & 0xFF; // byteContainer.get() & 0xFF;
- VectorBitPacker.fastunpack(in, tmpinpos, out, tmpoutpos, b);
+ decoder.fastunpack(in, tmpinpos, out, tmpoutpos, b);
tmpinpos += INTS_PER_BLOCK * b;
if (cexcept > 0) {
final int maxbits = bem[idx + 2]; // byteContainer.get();
@@ -363,4 +414,73 @@ public String toString() {
protected ByteBuffer makeBuffer(int sizeInBytes) {
return ByteBuffer.allocateDirect(sizeInBytes);
}
+
+ /**
+ * Packs the sub-block exception remainder, which is not a multiple of the
+ * vector block size, into the sequential scalar layout read back by
+ * {@link #slowunpack}. Zeroes its target words first, then OR-accumulates the
+ * packed bits, so a reused output buffer carries no stale bits.
+ */
+ private static int slowpack(final int[] in, int inpos, int inlen,
+ final int[] out, int outpos, int b) {
+ if (inlen == 0)
+ return outpos;
+ if (b == 32) {
+ System.arraycopy(in, inpos, out, outpos, inlen);
+ return outpos + inlen;
+ }
+ int mask = (1 << b) - 1;
+ Arrays.fill(out, outpos, outpos + (inlen * b + 31) / 32, 0);
+ int c = 0;
+ int l = 0;
+ int r = 0;
+ int val = 0;
+ for (int i = 0; i < inlen; i++) {
+ val = in[inpos + i] & mask;
+ out[outpos] |= val << (c + r);
+ c += b;
+ l = (32 - r) % b;
+ if (c + r >= 32) {
+ if (i < inlen - 1 || l != 0)
+ outpos++;
+ r = l == 0 ? 0 : b - l;
+ if (l != 0)
+ out[outpos] = val >> (b - r);
+ c = 0;
+ }
+ }
+ return outpos;
+ }
+
+ /** Reverses {@link #slowpack}. */
+ private static int slowunpack(final int[] in, int inpos, final int[] out,
+ int outpos, int outlen, int b) {
+ if (outlen == 0) {
+ return inpos;
+ }
+ if (b == 32) {
+ System.arraycopy(in, inpos, out, outpos, outlen);
+ return inpos + outlen;
+ }
+ int mask = (1 << b) - 1;
+ int limit = outpos + outlen;
+ int r = 0;
+ int val = 0;
+ int i = 0;
+ for (; outpos < limit; i++) {
+ if (r > 0)
+ out[outpos++] =
+ (val >>> (32 - (b - r))) | ((in[inpos + i] << (b - r)) & mask);
+ val = in[inpos + i];
+ int j = 0;
+ int l = 32 - r;
+ int ll = l % b == 0 ? l : l - b;
+ while (j < ll && outpos < limit) {
+ out[outpos++] = (val >> (j + r)) & mask;
+ j += b;
+ }
+ r = l % b == 0 ? 0 : b - (l % b);
+ }
+ return inpos + i;
+ }
}
diff --git a/src/main/java/me/lemire/integercompression/vector/VectorIntBitPackBenchmark.java b/src/main/java/me/lemire/integercompression/vector/VectorIntBitPackBenchmark.java
new file mode 100644
index 0000000..ee69c74
--- /dev/null
+++ b/src/main/java/me/lemire/integercompression/vector/VectorIntBitPackBenchmark.java
@@ -0,0 +1,103 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.integercompression.vector;
+
+import java.text.DecimalFormat;
+import java.util.Random;
+
+import me.lemire.integercompression.BitPacking;
+
+/**
+ * Benchmarks the vectorized int bit-packing kernels against the scalar
+ * unrolled {@link BitPacking}, packing 256-integer blocks of each width.
+ * Speeds are millions of integers per second. (For expert use; requires
+ * --add-modules jdk.incubator.vector.)
+ */
+public class VectorIntBitPackBenchmark {
+
+ private static final int BLOCK = 256;
+ private static final int SUBBLOCKS = BLOCK / 32;
+
+ private static void scalarPack(int[] in, int[] out, int bit) {
+ for (int blk = 0; blk < SUBBLOCKS; blk++) {
+ BitPacking.fastpackwithoutmask(in, blk * 32, out, blk * bit, bit);
+ }
+ }
+
+ private static void scalarUnpack(int[] in, int[] out, int bit) {
+ for (int blk = 0; blk < SUBBLOCKS; blk++) {
+ BitPacking.fastunpack(in, blk * bit, out, blk * 32, bit);
+ }
+ }
+
+ private static void test(boolean verbose) {
+ DecimalFormat df = new DecimalFormat("0");
+ final int times = 100000;
+ Random r = new Random(0);
+ int[] data = new int[BLOCK];
+ int[] compressed = new int[8 * 32];
+ int[] uncompressed = new int[BLOCK];
+ VectorBitPacker256 vec256 = new VectorBitPacker256();
+ VectorBitPacker vec = new VectorBitPacker();
+
+ for (int bit = 1; bit <= 32; ++bit) {
+ int mask = bit == 32 ? -1 : (1 << bit) - 1;
+ long scalarComp = 0;
+ long scalarDecomp = 0;
+ long vec256Comp = 0;
+ long vec256Decomp = 0;
+ long vecComp = 0;
+ long vecDecomp = 0;
+ for (int t = 0; t < times; ++t) {
+ for (int k = 0; k < BLOCK; ++k) {
+ data[k] = r.nextInt() & mask;
+ }
+ long time1 = System.nanoTime();
+ scalarPack(data, compressed, bit);
+ long time2 = System.nanoTime();
+ scalarUnpack(compressed, uncompressed, bit);
+ long time3 = System.nanoTime();
+ vec256.fastpackNoMask(data, 0, compressed, 0, bit);
+ long time4 = System.nanoTime();
+ vec256.fastunpack(compressed, 0, uncompressed, 0, bit);
+ long time5 = System.nanoTime();
+ vec.fastpackNoMask(data, 0, compressed, 0, bit);
+ long time6 = System.nanoTime();
+ vec.fastunpack(compressed, 0, uncompressed, 0, bit);
+ long time7 = System.nanoTime();
+ scalarComp += time2 - time1;
+ scalarDecomp += time3 - time2;
+ vec256Comp += time4 - time3;
+ vec256Decomp += time5 - time4;
+ vecComp += time6 - time5;
+ vecDecomp += time7 - time6;
+ }
+ if (verbose) {
+ double sc = BLOCK * times * 1000.0;
+ System.out.println("bit = " + bit
+ + " | scalar comp = " + df.format(sc / scalarComp)
+ + " vec256 comp = " + df.format(sc / vec256Comp)
+ + " vec comp = " + df.format(sc / vecComp)
+ + " | scalar decomp = " + df.format(sc / scalarDecomp)
+ + " vec256 decomp = " + df.format(sc / vec256Decomp)
+ + " vec decomp = " + df.format(sc / vecDecomp));
+ }
+ }
+ }
+
+ /**
+ * Main method.
+ *
+ * @param args command-line arguments
+ */
+ public static void main(String[] args) {
+ System.out.println("Testing int packing (scalar vs VectorBitPacker256 vs "
+ + "VectorBitPacker), 256-int blocks, speeds in millions of ints/s");
+ test(false);
+ test(true);
+ }
+}
diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java
index f134601..4d652dd 100644
--- a/src/main/java/module-info.java
+++ b/src/main/java/module-info.java
@@ -2,11 +2,13 @@
// SPDX-License-Identifier: Apache-2.0
module me.lemire.integercompression {
- // This is currently only for advanced users:
- // requires jdk.incubator.vector;
+ // Optional at runtime: only consumers of the vector package (VectorFastPFOR)
+ // need jdk.incubator.vector resolved (e.g. --add-modules jdk.incubator.vector).
+ // Scalar consumers resolve without it.
+ requires static jdk.incubator.vector;
exports me.lemire.integercompression;
exports me.lemire.longcompression;
exports me.lemire.longcompression.differential;
exports me.lemire.integercompression.differential;
- // exports me.lemire.integercompression.vector;
+ exports me.lemire.integercompression.vector;
}
diff --git a/src/test/java/me/lemire/integercompression/BasicTest.java b/src/test/java/me/lemire/integercompression/BasicTest.java
index b29ae0d..6743017 100644
--- a/src/test/java/me/lemire/integercompression/BasicTest.java
+++ b/src/test/java/me/lemire/integercompression/BasicTest.java
@@ -17,6 +17,7 @@
import me.lemire.integercompression.differential.IntegratedVariableByte;
import me.lemire.integercompression.differential.XorBinaryPacking;
import me.lemire.integercompression.synth.ClusteredDataGenerator;
+import me.lemire.integercompression.vector.VectorFastPFOR;
import org.junit.Test;
@@ -43,6 +44,7 @@ public class BasicTest {
new Composition(new OptPFDS16(), new VariableByte()),
new Composition(new FastPFOR128(), new VariableByte()),
new Composition(new FastPFOR(), new VariableByte()),
+ new Composition(new VectorFastPFOR(), new VariableByte()),
new Simple9(),
new Simple16(),
new GroupSimple9(),
diff --git a/src/test/java/me/lemire/integercompression/SkippableBasicTest.java b/src/test/java/me/lemire/integercompression/SkippableBasicTest.java
index 881dada..ca919d4 100644
--- a/src/test/java/me/lemire/integercompression/SkippableBasicTest.java
+++ b/src/test/java/me/lemire/integercompression/SkippableBasicTest.java
@@ -13,6 +13,7 @@
import me.lemire.integercompression.differential.IntegratedVariableByte;
import me.lemire.integercompression.differential.SkippableIntegratedComposition;
import me.lemire.integercompression.differential.SkippableIntegratedIntegerCODEC;
+import me.lemire.integercompression.vector.VectorFastPFOR;
import org.junit.Test;
import static org.junit.Assert.assertArrayEquals;
@@ -37,6 +38,7 @@ public class SkippableBasicTest {
new SkippableComposition(new OptPFDS16(), new VariableByte()),
new SkippableComposition(new FastPFOR128(), new VariableByte()),
new SkippableComposition(new FastPFOR(), new VariableByte()),
+ new SkippableComposition(new VectorFastPFOR(), new VariableByte()),
new Simple9(),
new Simple16() };
diff --git a/src/test/java/me/lemire/integercompression/vector/VectorBitPackerTest.java b/src/test/java/me/lemire/integercompression/vector/VectorBitPackerTest.java
new file mode 100644
index 0000000..6640890
--- /dev/null
+++ b/src/test/java/me/lemire/integercompression/vector/VectorBitPackerTest.java
@@ -0,0 +1,73 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.integercompression.vector;
+
+import static org.junit.Assert.assertArrayEquals;
+
+import java.util.Random;
+
+import org.junit.Test;
+
+/**
+ * Tests for the width-specific bit-packing kernels.
+ *
+ * VectorBitPacker (256/512-bit lanes), VectorBitPacker256 (256-bit lanes) and
+ * VectorBitPacker128 (128-bit lanes) use different lane strides, so their packed
+ * layouts differ and are not wire-compatible. Each must satisfy the same
+ * roundtrip contract: packing a 256-integer block whose values fit in b bits and
+ * unpacking it recovers the input for every width b.
+ */
+public class VectorBitPackerTest {
+
+ private static final int BLOCK_SIZE = 256;
+
+ private static int[] randomBlock(Random random, int b) {
+ int mask = b == 32 ? -1 : (1 << b) - 1;
+ int[] in = new int[BLOCK_SIZE];
+ for (int i = 0; i < BLOCK_SIZE; i++) {
+ in[i] = random.nextInt() & mask;
+ }
+ return in;
+ }
+
+ private static void roundTrip(VectorBitPackerKernels packer) {
+ Random random = new Random(42);
+ for (int b = 1; b <= 32; b++) {
+ int[] in = randomBlock(random, b);
+
+ int[] packed = new int[8 * b];
+ packer.fastpack(in, 0, packed, 0, b);
+ int[] recovered = new int[BLOCK_SIZE];
+ packer.fastunpack(packed, 0, recovered, 0, b);
+ assertArrayEquals("fastpack b=" + b, in, recovered);
+
+ int[] packedNoMask = new int[8 * b];
+ packer.fastpackNoMask(in, 0, packedNoMask, 0, b);
+ int[] recoveredNoMask = new int[BLOCK_SIZE];
+ packer.fastunpack(packedNoMask, 0, recoveredNoMask, 0, b);
+ assertArrayEquals("fastpackNoMask b=" + b, in, recoveredNoMask);
+
+ assertArrayEquals("fastpack vs fastpackNoMask b=" + b, packed,
+ packedNoMask);
+ }
+ }
+
+ @Test
+ public void vectorBitPacker128RoundTrip() {
+ roundTrip(new VectorBitPacker128());
+ }
+
+ @Test
+ public void vectorBitPacker256RoundTrip() {
+ roundTrip(new VectorBitPacker256());
+ }
+
+ @Test
+ public void vectorBitPackerRoundTrip() {
+ roundTrip(new VectorBitPacker());
+ }
+}
diff --git a/src/test/java/me/lemire/integercompression/vector/VectorFastPFORTest.java b/src/test/java/me/lemire/integercompression/vector/VectorFastPFORTest.java
new file mode 100644
index 0000000..e222c09
--- /dev/null
+++ b/src/test/java/me/lemire/integercompression/vector/VectorFastPFORTest.java
@@ -0,0 +1,88 @@
+/**
+ * This code is released under the
+ * Apache License Version 2.0 http://www.apache.org/licenses/.
+ *
+ * (c) Daniel Lemire, http://lemire.me/en/
+ */
+package me.lemire.integercompression.vector;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertThrows;
+
+import java.util.Arrays;
+
+import org.junit.Test;
+
+import me.lemire.integercompression.IntWrapper;
+import me.lemire.integercompression.vector.VectorBitPackerKernels.LaneWidth;
+
+/**
+ * Tests for the vectorized FastPFOR codec.
+ */
+public class VectorFastPFORTest {
+
+ /**
+ * A few exceptions in a single block leave a sub-block remainder that is
+ * packed with slowpack, which OR-accumulates into the output. Compressing
+ * into a non-zero buffer must still produce a clean roundtrip.
+ */
+ @Test
+ public void dirtyOutputBufferRoundTrip() {
+ int[] data = new int[VectorFastPFOR.BLOCK_SIZE];
+ for (int i = 0; i < data.length; i++) {
+ data[i] = i % 8; // base values fit in 3 bits
+ }
+ data[5] = 1 << 20; // exceptions sharing one width, count not a multiple
+ data[200] = 1 << 20; // of BLOCK_SIZE, so the remainder goes through slowpack
+
+ VectorFastPFOR codec = new VectorFastPFOR();
+ int[] compressed = new int[2 * data.length];
+ Arrays.fill(compressed, -1); // stale bits the slowpack remainder must overwrite
+ IntWrapper inpos = new IntWrapper(0);
+ IntWrapper outpos = new IntWrapper(0);
+ codec.headlessCompress(data, inpos, data.length, compressed, outpos);
+
+ int[] recovered = new int[data.length];
+ codec.headlessUncompress(compressed, new IntWrapper(0), outpos.get(),
+ recovered, new IntWrapper(0), data.length);
+
+ assertArrayEquals(data, recovered);
+ }
+
+ /** A stream packed for wider lanes than the host runs natively is refused. */
+ @Test
+ public void checkDecodableRejectsWiderStream() {
+ assertThrows(IllegalStateException.class,
+ () -> VectorFastPFOR.checkDecodable(LaneWidth.BITS_256, LaneWidth.BITS_128));
+ assertThrows(IllegalStateException.class,
+ () -> VectorFastPFOR.checkDecodable(LaneWidth.BITS_512, LaneWidth.BITS_256));
+ // equal or narrower stream decodes natively
+ VectorFastPFOR.checkDecodable(LaneWidth.BITS_128, LaneWidth.BITS_128);
+ VectorFastPFOR.checkDecodable(LaneWidth.BITS_128, LaneWidth.BITS_512);
+ VectorFastPFOR.checkDecodable(LaneWidth.BITS_256, LaneWidth.BITS_512);
+ }
+
+ /** 128-bit lanes are the universal floor, so such a stream decodes on any host. */
+ @Test
+ public void lowestCommonWidthRoundTripsOnAnyHost() {
+ int[] data = new int[3 * VectorFastPFOR.BLOCK_SIZE];
+ for (int i = 0; i < data.length; i++) {
+ data[i] = i % 8;
+ }
+ data[5] = 1 << 20;
+ data[600] = 1 << 25;
+
+ VectorFastPFOR codec =
+ new VectorFastPFOR(VectorFastPFOR.DEFAULT_PAGE_SIZE, LaneWidth.BITS_128);
+ int[] compressed = new int[2 * data.length];
+ IntWrapper inpos = new IntWrapper(0);
+ IntWrapper outpos = new IntWrapper(0);
+ codec.headlessCompress(data, inpos, data.length, compressed, outpos);
+
+ int[] recovered = new int[data.length];
+ codec.headlessUncompress(compressed, new IntWrapper(0), outpos.get(),
+ recovered, new IntWrapper(0), data.length);
+
+ assertArrayEquals(data, recovered);
+ }
+}