--reset

--dtag=undef,abx,aBx16b
--sdt=f32,bf16
--ddt=f32,bf16
--stag=abx:abx
--axis=3 3x4x5x13:3x4x5x17
--axis=2 5x2x0x8:5x2x8x8
--axis=0 7x8x4x9:9x8x4x9
--axis=1 4x16x2x1:4x8x2x1

--stag=abx:abx:abx
--axis=1
6x48x3x4x5:6x32x3x4x5:6x16x3x4x5

# sizes less than blocks (+zero dim)
--stag=aBx16b:abx:aBx8b:axb
--axis=1
6x8x3x4:6x2x3x4:6x3x3x4:6x0x3x4
6x8x3x4:6x1x3x4:6x0x3x4:6x3x3x4
6x0x3x4:6x3x3x4:6x5x3x4:6x5x3x4

# f16
--sdt=f16
--ddt=f16
--stag=abx:abx
--axis=3 3x4x5x13:3x4x5x17
--axis=2 5x2x0x8:5x2x8x8
--axis=0 7x8x4x9:9x8x4x9
--axis=1 4x16x2x1:4x8x2x1

--stag=abx:abx:abx
--axis=1
6x48x3x4x5:6x32x3x4x5:6x16x3x4x5

# sizes less than blocks (+zero dim)
--stag=aBx16b:abx:aBx8b:axb
--axis=1
6x8x3x4:6x2x3x4:6x3x3x4:6x0x3x4
6x8x3x4:6x1x3x4:6x0x3x4:6x3x3x4
6x0x3x4:6x3x3x4:6x5x3x4:6x5x3x4

# stag different from dtag
--stag=abx:abx
--dtag=axb
--axis=1 2x16x3x4:2x16x3x4

# Test layers of some key GPU DL Frameworks
--reset
--batch=option_set_fwks_key_gpu

# additional tests for gen9 concat kernel
--reset
--batch=option_set_gen9_gpu

# multiple arguments tests
--reset
--axis=1
--stag=ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab:ab
--dtag=ab
512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128:512x128

--stag=aBx16b:aBx16b:aBx16b:aBx16b:aBx16b:aBx16b:aBx16b:aBx16b:aBx16b:aBx16b:aBx16b:aBx16b
--dtag=aBx16b
10x32x16:10x1x16:10x64x16:10x9x16:10x11x16:10x128x16:10x7x16:10x32x16:10x32x16:10x32x16:10x32x16:10x32x16

# regression tests
--reset
--sdt=f32 --ddt=f32 --stag=abcd:abcd --dtag=abcd --axis=3 32x8x8x1:32x8x8x1
--reset
--sdt=f16 --ddt=f16 --stag=aBx16b:aBx16b --dtag=aBx16b 1x32x6x6:1x32x6x6
--reset
--stag=abx:abx 256x1x8x8x3x3:256x1x8x8x3x3
