; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s

define <16 x i8> @test1(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test1:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ld1r { v1.8b }, [x1]
; CHECK-NEXT:    ld1r { v0.8b }, [x0]
; CHECK-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-NEXT:    ret
entry:
  %0 = load i8, ptr %a, align 1
  %1 = insertelement <8 x i8> poison, i8 %0, i64 0
  %lane = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> zeroinitializer
  %2 = load i8, ptr %b, align 1
  %3 = insertelement <8 x i8> poison, i8 %2, i64 0
  %lane2 = shufflevector <8 x i8> %3, <8 x i8> poison, <8 x i32> zeroinitializer
  %shuffle.i = shufflevector <8 x i8> %lane, <8 x i8> %lane2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  ret <16 x i8> %shuffle.i
}

define <16 x i8> @test2(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test2:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ld1r { v1.8b }, [x1]
; CHECK-NEXT:    ldrb w8, [x0]
; CHECK-NEXT:    dup v0.8b, w8
; CHECK-NEXT:    mov v1.b[7], w8
; CHECK-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-NEXT:    ret
entry:
  %0 = load i8, ptr %a, align 1
  %1 = insertelement <8 x i8> poison, i8 %0, i64 0
  %lane = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> zeroinitializer
  %2 = load i8, ptr %b, align 1
  %3 = insertelement <8 x i8> poison, i8 %2, i64 0
  %lane2 = shufflevector <8 x i8> %3, <8 x i8> poison, <8 x i32> zeroinitializer
  %shuffle.i = shufflevector <8 x i8> %lane, <8 x i8> %lane2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
  ret <16 x i8> %shuffle.i
}

define <16 x i8> @test3(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test3:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ld1r { v0.8b }, [x0]
; CHECK-NEXT:    ld1r { v1.8b }, [x1]
; CHECK-NEXT:    zip1 v0.16b, v0.16b, v1.16b
; CHECK-NEXT:    ret
entry:
  %0 = load i8, ptr %a, align 1
  %1 = insertelement <8 x i8> poison, i8 %0, i64 0
  %lane = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> zeroinitializer
  %2 = load i8, ptr %b, align 1
  %3 = insertelement <8 x i8> poison, i8 %2, i64 0
  %lane2 = shufflevector <8 x i8> %3, <8 x i8> poison, <8 x i32> zeroinitializer
  %shuffle.i = shufflevector <8 x i8> %lane, <8 x i8> %lane2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
  ret <16 x i8> %shuffle.i
}

define <16 x i8> @test4(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test4:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ld1r { v1.8b }, [x0]
; CHECK-NEXT:    ld1r { v0.8b }, [x1]
; CHECK-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-NEXT:    ret
entry:
  %0 = load i8, ptr %a, align 1
  %1 = insertelement <8 x i8> poison, i8 %0, i64 0
  %lane = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> zeroinitializer
  %2 = load i8, ptr %b, align 1
  %3 = insertelement <8 x i8> poison, i8 %2, i64 0
  %lane2 = shufflevector <8 x i8> %3, <8 x i8> poison, <8 x i32> zeroinitializer
  %shuffle.i = shufflevector <8 x i8> %lane, <8 x i8> %lane2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <16 x i8> %shuffle.i
}

define <16 x i8> @test5(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test5:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    adrp x8, .LCPI4_0
; CHECK-NEXT:    ld1r { v1.16b }, [x1]
; CHECK-NEXT:    ldr b0, [x0]
; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-NEXT:    ret
entry:
  %0 = load i8, ptr %a, align 1
  %1 = insertelement <8 x i8> poison, i8 %0, i64 0
  %lane = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> zeroinitializer
  %2 = load i8, ptr %b, align 1
  %3 = insertelement <8 x i8> poison, i8 %2, i64 0
  %lane2 = shufflevector <8 x i8> %3, <8 x i8> poison, <8 x i32> zeroinitializer
  %shuffle.i = shufflevector <8 x i8> %lane, <8 x i8> %lane2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15>
  ret <16 x i8> %shuffle.i
}

define <8 x i8> @test6(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test6:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ld1r { v1.8b }, [x1]
; CHECK-NEXT:    ld1r { v0.8b }, [x0]
; CHECK-NEXT:    mov v0.s[1], v1.s[1]
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT:    ret
entry:
  %0 = load i8, ptr %a, align 1
  %1 = insertelement <4 x i8> poison, i8 %0, i64 0
  %lane = shufflevector <4 x i8> %1, <4 x i8> poison, <4 x i32> zeroinitializer
  %2 = load i8, ptr %b, align 1
  %3 = insertelement <4 x i8> poison, i8 %2, i64 0
  %lane2 = shufflevector <4 x i8> %3, <4 x i8> poison, <4 x i32> zeroinitializer
  %shuffle.i = shufflevector <4 x i8> %lane, <4 x i8> %lane2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i8> %shuffle.i
}

define <8 x i8> @test7(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test7:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ld1r { v1.8b }, [x0]
; CHECK-NEXT:    ld1r { v0.8b }, [x1]
; CHECK-NEXT:    mov v0.s[1], v1.s[1]
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT:    ret
entry:
  %0 = load i8, ptr %a, align 1
  %1 = insertelement <4 x i8> poison, i8 %0, i64 0
  %lane = shufflevector <4 x i8> %1, <4 x i8> poison, <4 x i32> zeroinitializer
  %2 = load i8, ptr %b, align 1
  %3 = insertelement <4 x i8> poison, i8 %2, i64 0
  %lane2 = shufflevector <4 x i8> %3, <4 x i8> poison, <4 x i32> zeroinitializer
  %shuffle.i = shufflevector <4 x i8> %lane, <4 x i8> %lane2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
  ret <8 x i8> %shuffle.i
}

define <8 x i16> @test8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test8:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ld1r { v1.4h }, [x1]
; CHECK-NEXT:    ld1r { v0.4h }, [x0]
; CHECK-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-NEXT:    ret
entry:
  %0 = load i16, ptr %a, align 1
  %1 = insertelement <4 x i16> poison, i16 %0, i64 0
  %lane = shufflevector <4 x i16> %1, <4 x i16> poison, <4 x i32> zeroinitializer
  %2 = load i16, ptr %b, align 1
  %3 = insertelement <4 x i16> poison, i16 %2, i64 0
  %lane2 = shufflevector <4 x i16> %3, <4 x i16> poison, <4 x i32> zeroinitializer
  %shuffle.i = shufflevector <4 x i16> %lane, <4 x i16> %lane2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %shuffle.i
}

define <4 x i32> @test9(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test9:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ld1r { v1.2s }, [x1]
; CHECK-NEXT:    ld1r { v0.2s }, [x0]
; CHECK-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-NEXT:    ret
entry:
  %0 = load i32, ptr %a, align 1
  %1 = insertelement <2 x i32> poison, i32 %0, i64 0
  %lane = shufflevector <2 x i32> %1, <2 x i32> poison, <2 x i32> zeroinitializer
  %2 = load i32, ptr %b, align 1
  %3 = insertelement <2 x i32> poison, i32 %2, i64 0
  %lane2 = shufflevector <2 x i32> %3, <2 x i32> poison, <2 x i32> zeroinitializer
  %shuffle.i = shufflevector <2 x i32> %lane, <2 x i32> %lane2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i32> %shuffle.i
}

define <2 x i64> @test10(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test10:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ldr d0, [x0]
; CHECK-NEXT:    ldr d1, [x1]
; CHECK-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-NEXT:    ret
entry:
  %0 = load i64, ptr %a, align 1
  %lane = bitcast i64 %0 to <1 x i64>
  %1 = load i64, ptr %b, align 1
  %lane2 = bitcast i64 %1 to <1 x i64>
  %shuffle.i = shufflevector <1 x i64> %lane, <1 x i64> %lane2, <2 x i32> <i32 0, i32 1>
  ret <2 x i64> %shuffle.i
}

define <8 x i8> @test11(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test11:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ld1r { v1.8b }, [x0]
; CHECK-NEXT:    ld1r { v2.8b }, [x1]
; CHECK-NEXT:    mov v0.16b, v1.16b
; CHECK-NEXT:    mov v0.h[2], v2.h[0]
; CHECK-NEXT:    mov v0.h[3], v1.h[0]
; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT:    ret
entry:
  %0 = load i8, ptr %a, align 1
  %1 = insertelement <4 x i8> poison, i8 %0, i64 0
  %lane = shufflevector <4 x i8> %1, <4 x i8> poison, <4 x i32> zeroinitializer
  %2 = load i8, ptr %b, align 1
  %3 = insertelement <4 x i8> poison, i8 %2, i64 0
  %lane2 = shufflevector <4 x i8> %3, <4 x i8> poison, <4 x i32> zeroinitializer
  %shuffle.i = shufflevector <4 x i8> %lane, <4 x i8> %lane2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 0, i32 1>
  ret <8 x i8> %shuffle.i
}

define <4 x i32> @test12(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test12:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ld1r { v0.2s }, [x0]
; CHECK-NEXT:    ldr w8, [x1]
; CHECK-NEXT:    mov v1.16b, v0.16b
; CHECK-NEXT:    mov v0.s[1], w8
; CHECK-NEXT:    mov v1.s[0], w8
; CHECK-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-NEXT:    ret
entry:
  %0 = load i32, ptr %a, align 1
  %1 = insertelement <2 x i32> poison, i32 %0, i64 0
  %lane = shufflevector <2 x i32> %1, <2 x i32> poison, <2 x i32> zeroinitializer
  %2 = load i32, ptr %b, align 1
  %3 = insertelement <2 x i32> poison, i32 %2, i64 0
  %lane2 = shufflevector <2 x i32> %3, <2 x i32> poison, <2 x i32> zeroinitializer
  %shuffle.i = shufflevector <2 x i32> %lane, <2 x i32> %lane2, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
  ret <4 x i32> %shuffle.i
}

define <2 x i64> @test13(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test13:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ldr d1, [x0]
; CHECK-NEXT:    ldr d0, [x1]
; CHECK-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-NEXT:    ret
entry:
  %0 = load i64, ptr %a, align 1
  %lane = bitcast i64 %0 to <1 x i64>
  %1 = load i64, ptr %b, align 1
  %lane2 = bitcast i64 %1 to <1 x i64>
  %shuffle.i = shufflevector <1 x i64> %lane, <1 x i64> %lane2, <2 x i32> <i32 1, i32 0>
  ret <2 x i64> %shuffle.i
}

define <3 x i32> @test14(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LABEL: test14:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ldr w8, [x0]
; CHECK-NEXT:    fmov s0, w8
; CHECK-NEXT:    mov v0.s[1], w8
; CHECK-NEXT:    ld1 { v0.s }[2], [x1]
; CHECK-NEXT:    ret
entry:
  %0 = load i32, ptr %a, align 1
  %1 = insertelement <2 x i32> poison, i32 %0, i64 0
  %lane = shufflevector <2 x i32> %1, <2 x i32> poison, <2 x i32> zeroinitializer
  %2 = load i32, ptr %b, align 1
  %3 = insertelement <2 x i32> poison, i32 %2, i64 0
  %lane2 = shufflevector <2 x i32> %3, <2 x i32> poison, <2 x i32> zeroinitializer
  %shuffle.i = shufflevector <2 x i32> %lane, <2 x i32> %lane2, <3 x i32> <i32 0, i32 1, i32 2>
  ret <3 x i32> %shuffle.i
}

