//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64


.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff(
	.param .b64 _Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_0,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_1,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_2,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_3,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_4,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_5,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_6
)
{
	.reg .pred 	%p<7>;
	.reg .s16 	%rs<33>;
	.reg .s32 	%r<34>;
	.reg .f32 	%f<122>;
	.reg .s64 	%rd<22>;


	ld.param.u64 	%rd5, [_Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_0];
	ld.param.u32 	%r8, [_Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_1];
	ld.param.u32 	%r9, [_Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_2];
	ld.param.u32 	%r10, [_Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_3];
	ld.param.u32 	%r11, [_Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_4];
	ld.param.f32 	%f71, [_Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_5];
	ld.param.f32 	%f72, [_Z32__d_bilinear_interp_pixel_float4P6float4i17DevicePixelFormatiiff_param_6];
	cvt.rzi.ftz.s32.f32	%r1, %f71;
	cvt.rn.f32.s32	%f73, %r1;
	sub.ftz.f32 	%f74, %f71, %f73;
	cvt.rzi.ftz.s32.f32	%r12, %f74;
	mov.u32 	%r13, 1;
	min.s32 	%r14, %r12, %r13;
	setp.gt.s32	%p1, %r14, 0;
	mov.u32 	%r15, 0;
	cvt.rn.f32.s32	%f75, %r14;
	selp.f32	%f1, %f75, 0f00000000, %p1;
	cvt.rzi.ftz.s32.f32	%r2, %f72;
	cvt.rn.f32.s32	%f76, %r2;
	sub.ftz.f32 	%f77, %f72, %f76;
	cvt.rzi.ftz.s32.f32	%r16, %f77;
	min.s32 	%r17, %r16, %r13;
	setp.gt.s32	%p2, %r17, 0;
	cvt.rn.f32.s32	%f78, %r17;
	selp.f32	%f2, %f78, 0f00000000, %p2;
	add.s32 	%r18, %r10, -1;
	min.s32 	%r19, %r18, %r1;
	max.s32 	%r3, %r15, %r19;
	mov.f32 	%f79, 0f3F800000;
	sub.ftz.f32 	%f80, %f79, %f1;
	abs.ftz.f32 	%f3, %f80;
	add.s32 	%r4, %r11, -1;
	min.s32 	%r20, %r4, %r2;
	max.s32 	%r21, %r15, %r20;
	sub.ftz.f32 	%f81, %f79, %f2;
	abs.ftz.f32 	%f4, %f81;
	mul.ftz.f32 	%f5, %f3, %f4;
	mul.lo.s32 	%r5, %r21, %r8;
	add.s32 	%r22, %r5, %r3;
	cvt.s64.s32	%rd1, %r22;
	setp.eq.s32	%p3, %r9, 0;
	@%p3 bra 	BB0_2;

	shl.b64 	%rd6, %rd1, 4;
	add.s64 	%rd7, %rd5, %rd6;
	ld.v4.f32 	{%f82, %f83, %f84, %f85}, [%rd7];
	mov.f32 	%f109, %f85;
	mov.f32 	%f108, %f84;
	mov.f32 	%f107, %f83;
	mov.f32 	%f106, %f82;
	bra.uni 	BB0_3;

BB0_2:
	shl.b64 	%rd8, %rd1, 3;
	add.s64 	%rd9, %rd5, %rd8;
	ld.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd9];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f106, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f107, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f108, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f109, %temp;
	}

BB0_3:
	fma.rn.ftz.f32 	%f18, %f106, %f5, 0f00000000;
	mov.f32 	%f86, 0f00000000;
	fma.rn.ftz.f32 	%f19, %f107, %f5, 0f00000000;
	fma.rn.ftz.f32 	%f20, %f108, %f5, 0f00000000;
	fma.rn.ftz.f32 	%f21, %f109, %f5, 0f00000000;
	add.s32 	%r23, %r2, 1;
	min.s32 	%r24, %r4, %r23;
	max.s32 	%r26, %r15, %r24;
	sub.ftz.f32 	%f87, %f86, %f2;
	abs.ftz.f32 	%f22, %f87;
	mul.ftz.f32 	%f23, %f3, %f22;
	mul.lo.s32 	%r6, %r26, %r8;
	add.s32 	%r27, %r6, %r3;
	cvt.s64.s32	%rd2, %r27;
	@%p3 bra 	BB0_5;

	shl.b64 	%rd10, %rd2, 4;
	add.s64 	%rd11, %rd5, %rd10;
	ld.v4.f32 	{%f88, %f89, %f90, %f91}, [%rd11];
	mov.f32 	%f113, %f91;
	mov.f32 	%f112, %f90;
	mov.f32 	%f111, %f89;
	mov.f32 	%f110, %f88;
	bra.uni 	BB0_6;

BB0_5:
	shl.b64 	%rd12, %rd2, 3;
	add.s64 	%rd13, %rd5, %rd12;
	ld.v4.u16 	{%rs9, %rs10, %rs11, %rs12}, [%rd13];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs9;
	cvt.f32.f16 	%f110, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs10;
	cvt.f32.f16 	%f111, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs11;
	cvt.f32.f16 	%f112, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs12;
	cvt.f32.f16 	%f113, %temp;
	}

BB0_6:
	fma.rn.ftz.f32 	%f36, %f110, %f23, %f18;
	fma.rn.ftz.f32 	%f37, %f111, %f23, %f19;
	fma.rn.ftz.f32 	%f38, %f112, %f23, %f20;
	fma.rn.ftz.f32 	%f39, %f113, %f23, %f21;
	add.s32 	%r29, %r1, 1;
	min.s32 	%r30, %r18, %r29;
	max.s32 	%r7, %r15, %r30;
	sub.ftz.f32 	%f93, %f86, %f1;
	abs.ftz.f32 	%f40, %f93;
	mul.ftz.f32 	%f41, %f40, %f4;
	add.s32 	%r32, %r5, %r7;
	cvt.s64.s32	%rd3, %r32;
	@%p3 bra 	BB0_8;

	shl.b64 	%rd14, %rd3, 4;
	add.s64 	%rd15, %rd5, %rd14;
	ld.v4.f32 	{%f94, %f95, %f96, %f97}, [%rd15];
	mov.f32 	%f117, %f97;
	mov.f32 	%f116, %f96;
	mov.f32 	%f115, %f95;
	mov.f32 	%f114, %f94;
	bra.uni 	BB0_9;

BB0_8:
	shl.b64 	%rd16, %rd3, 3;
	add.s64 	%rd17, %rd5, %rd16;
	ld.v4.u16 	{%rs17, %rs18, %rs19, %rs20}, [%rd17];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs17;
	cvt.f32.f16 	%f114, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs18;
	cvt.f32.f16 	%f115, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs19;
	cvt.f32.f16 	%f116, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs20;
	cvt.f32.f16 	%f117, %temp;
	}

BB0_9:
	fma.rn.ftz.f32 	%f54, %f114, %f41, %f36;
	fma.rn.ftz.f32 	%f55, %f115, %f41, %f37;
	fma.rn.ftz.f32 	%f56, %f116, %f41, %f38;
	fma.rn.ftz.f32 	%f57, %f117, %f41, %f39;
	mul.ftz.f32 	%f58, %f40, %f22;
	add.s32 	%r33, %r6, %r7;
	cvt.s64.s32	%rd4, %r33;
	@%p3 bra 	BB0_11;

	shl.b64 	%rd18, %rd4, 4;
	add.s64 	%rd19, %rd5, %rd18;
	ld.v4.f32 	{%f98, %f99, %f100, %f101}, [%rd19];
	mov.f32 	%f121, %f101;
	mov.f32 	%f120, %f100;
	mov.f32 	%f119, %f99;
	mov.f32 	%f118, %f98;
	bra.uni 	BB0_12;

BB0_11:
	shl.b64 	%rd20, %rd4, 3;
	add.s64 	%rd21, %rd5, %rd20;
	ld.v4.u16 	{%rs25, %rs26, %rs27, %rs28}, [%rd21];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs25;
	cvt.f32.f16 	%f118, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs26;
	cvt.f32.f16 	%f119, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs27;
	cvt.f32.f16 	%f120, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs28;
	cvt.f32.f16 	%f121, %temp;
	}

BB0_12:
	fma.rn.ftz.f32 	%f102, %f118, %f58, %f54;
	fma.rn.ftz.f32 	%f103, %f119, %f58, %f55;
	fma.rn.ftz.f32 	%f104, %f120, %f58, %f56;
	fma.rn.ftz.f32 	%f105, %f121, %f58, %f57;
	st.param.f32	[func_retval0+0], %f102;
	st.param.f32	[func_retval0+4], %f103;
	st.param.f32	[func_retval0+8], %f104;
	st.param.f32	[func_retval0+12], %f105;
	ret;
}

.visible .func  (.param .align 8 .b8 func_retval0[8]) _Z32__d_bilinear_interp_pixel_float2P6float2iiiff(
	.param .b64 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_0,
	.param .b32 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_1,
	.param .b32 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_2,
	.param .b32 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_3,
	.param .b32 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_4,
	.param .b32 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_5
)
{
	.reg .pred 	%p<3>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<49>;
	.reg .s64 	%rd<10>;


	ld.param.u64 	%rd1, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_0];
	ld.param.u32 	%r1, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_1];
	ld.param.u32 	%r2, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_2];
	ld.param.u32 	%r3, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_3];
	ld.param.f32 	%f1, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_4];
	ld.param.f32 	%f2, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_5];
	cvt.rzi.ftz.s32.f32	%r4, %f1;
	cvt.rzi.ftz.s32.f32	%r5, %f2;
	cvt.rn.f32.s32	%f3, %r4;
	sub.ftz.f32 	%f4, %f1, %f3;
	cvt.rzi.ftz.s32.f32	%r6, %f4;
	mov.u32 	%r7, 1;
	min.s32 	%r8, %r6, %r7;
	setp.gt.s32	%p1, %r8, 0;
	mov.u32 	%r9, 0;
	cvt.rn.f32.s32	%f5, %r8;
	selp.f32	%f6, %f5, 0f00000000, %p1;
	mov.f32 	%f7, 0f00000000;
	cvt.rn.f32.s32	%f8, %r5;
	sub.ftz.f32 	%f9, %f2, %f8;
	cvt.rzi.ftz.s32.f32	%r10, %f9;
	min.s32 	%r11, %r10, %r7;
	setp.gt.s32	%p2, %r11, 0;
	cvt.rn.f32.s32	%f10, %r11;
	selp.f32	%f11, %f10, 0f00000000, %p2;
	add.s32 	%r12, %r2, -1;
	add.s32 	%r13, %r3, -1;
	min.s32 	%r14, %r13, %r5;
	max.s32 	%r15, %r9, %r14;
	mov.f32 	%f12, 0f3F800000;
	sub.ftz.f32 	%f13, %f12, %f11;
	min.s32 	%r16, %r12, %r4;
	max.s32 	%r17, %r9, %r16;
	sub.ftz.f32 	%f14, %f12, %f6;
	abs.ftz.f32 	%f15, %f14;
	abs.ftz.f32 	%f16, %f13;
	mul.ftz.f32 	%f17, %f15, %f16;
	mul.lo.s32 	%r18, %r15, %r1;
	add.s32 	%r19, %r18, %r17;
	mul.wide.s32 	%rd2, %r19, 8;
	add.s64 	%rd3, %rd1, %rd2;
	ld.v2.f32 	{%f18, %f19}, [%rd3];
	fma.rn.ftz.f32 	%f21, %f18, %f17, 0f00000000;
	fma.rn.ftz.f32 	%f23, %f19, %f17, 0f00000000;
	add.s32 	%r20, %r5, 1;
	min.s32 	%r21, %r13, %r20;
	max.s32 	%r22, %r9, %r21;
	sub.ftz.f32 	%f24, %f7, %f11;
	abs.ftz.f32 	%f25, %f24;
	mul.ftz.f32 	%f26, %f15, %f25;
	mul.lo.s32 	%r23, %r22, %r1;
	add.s32 	%r24, %r23, %r17;
	mul.wide.s32 	%rd4, %r24, 8;
	add.s64 	%rd5, %rd1, %rd4;
	ld.v2.f32 	{%f27, %f28}, [%rd5];
	fma.rn.ftz.f32 	%f30, %f27, %f26, %f21;
	fma.rn.ftz.f32 	%f32, %f28, %f26, %f23;
	add.s32 	%r25, %r4, 1;
	min.s32 	%r26, %r12, %r25;
	max.s32 	%r27, %r9, %r26;
	sub.ftz.f32 	%f33, %f7, %f6;
	abs.ftz.f32 	%f34, %f33;
	mul.ftz.f32 	%f35, %f34, %f16;
	add.s32 	%r28, %r18, %r27;
	mul.wide.s32 	%rd6, %r28, 8;
	add.s64 	%rd7, %rd1, %rd6;
	ld.v2.f32 	{%f36, %f37}, [%rd7];
	fma.rn.ftz.f32 	%f39, %f36, %f35, %f30;
	fma.rn.ftz.f32 	%f41, %f37, %f35, %f32;
	mul.ftz.f32 	%f42, %f34, %f25;
	add.s32 	%r29, %r23, %r27;
	mul.wide.s32 	%rd8, %r29, 8;
	add.s64 	%rd9, %rd1, %rd8;
	ld.v2.f32 	{%f43, %f44}, [%rd9];
	fma.rn.ftz.f32 	%f46, %f43, %f42, %f39;
	fma.rn.ftz.f32 	%f48, %f44, %f42, %f41;
	st.param.f32	[func_retval0+0], %f46;
	st.param.f32	[func_retval0+4], %f48;
	ret;
}

.visible .entry _d_image_rgb2gray_kernel_div255(
	.param .u64 _d_image_rgb2gray_kernel_div255_param_0,
	.param .u32 _d_image_rgb2gray_kernel_div255_param_1,
	.param .u64 _d_image_rgb2gray_kernel_div255_param_2,
	.param .u32 _d_image_rgb2gray_kernel_div255_param_3,
	.param .u32 _d_image_rgb2gray_kernel_div255_param_4,
	.param .u32 _d_image_rgb2gray_kernel_div255_param_5,
	.param .f32 _d_image_rgb2gray_kernel_div255_param_6,
	.param .f32 _d_image_rgb2gray_kernel_div255_param_7,
	.param .f32 _d_image_rgb2gray_kernel_div255_param_8
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<15>;
	.reg .f32 	%f<16>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [_d_image_rgb2gray_kernel_div255_param_0];
	ld.param.u32 	%r3, [_d_image_rgb2gray_kernel_div255_param_1];
	ld.param.u64 	%rd2, [_d_image_rgb2gray_kernel_div255_param_2];
	ld.param.u32 	%r4, [_d_image_rgb2gray_kernel_div255_param_3];
	ld.param.u32 	%r5, [_d_image_rgb2gray_kernel_div255_param_4];
	ld.param.u32 	%r6, [_d_image_rgb2gray_kernel_div255_param_5];
	ld.param.f32 	%f1, [_d_image_rgb2gray_kernel_div255_param_6];
	ld.param.f32 	%f2, [_d_image_rgb2gray_kernel_div255_param_7];
	ld.param.f32 	%f3, [_d_image_rgb2gray_kernel_div255_param_8];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB2_2;
	bra.uni 	BB2_1;

BB2_1:
	cvta.to.global.u64 	%rd3, %rd1;
	cvta.to.global.u64 	%rd4, %rd2;
	mad.lo.s32 	%r13, %r2, %r4, %r1;
	mul.wide.s32 	%rd5, %r13, 16;
	add.s64 	%rd6, %rd4, %rd5;
	ld.global.v4.f32 	{%f4, %f5, %f6, %f7}, [%rd6];
	mul.ftz.f32 	%f9, %f5, %f2;
	fma.rn.ftz.f32 	%f11, %f4, %f1, %f9;
	fma.rn.ftz.f32 	%f13, %f6, %f3, %f11;
	mov.f32 	%f14, 0f437F0000;
	div.approx.ftz.f32 	%f15, %f13, %f14;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd7, %r14, 4;
	add.s64 	%rd8, %rd3, %rd7;
	st.global.f32 	[%rd8], %f15;

BB2_2:
	ret;
}

.visible .entry _d_bilinear_resize_kernel_float4(
	.param .u64 _d_bilinear_resize_kernel_float4_param_0,
	.param .u32 _d_bilinear_resize_kernel_float4_param_1,
	.param .u32 _d_bilinear_resize_kernel_float4_param_2,
	.param .u32 _d_bilinear_resize_kernel_float4_param_3,
	.param .u64 _d_bilinear_resize_kernel_float4_param_4,
	.param .u32 _d_bilinear_resize_kernel_float4_param_5,
	.param .u32 _d_bilinear_resize_kernel_float4_param_6,
	.param .u32 _d_bilinear_resize_kernel_float4_param_7,
	.param .u32 _d_bilinear_resize_kernel_float4_param_8,
	.param .f32 _d_bilinear_resize_kernel_float4_param_9,
	.param .f32 _d_bilinear_resize_kernel_float4_param_10,
	.param .f32 _d_bilinear_resize_kernel_float4_param_11
)
{
	.reg .pred 	%p<10>;
	.reg .s16 	%rs<33>;
	.reg .s32 	%r<58>;
	.reg .f32 	%f<133>;
	.reg .s64 	%rd<33>;


	ld.param.u64 	%rd4, [_d_bilinear_resize_kernel_float4_param_0];
	ld.param.u32 	%r9, [_d_bilinear_resize_kernel_float4_param_1];
	ld.param.u32 	%r14, [_d_bilinear_resize_kernel_float4_param_2];
	ld.param.u32 	%r15, [_d_bilinear_resize_kernel_float4_param_3];
	ld.param.u64 	%rd5, [_d_bilinear_resize_kernel_float4_param_4];
	ld.param.u32 	%r10, [_d_bilinear_resize_kernel_float4_param_5];
	ld.param.u32 	%r11, [_d_bilinear_resize_kernel_float4_param_6];
	ld.param.u32 	%r12, [_d_bilinear_resize_kernel_float4_param_7];
	ld.param.u32 	%r13, [_d_bilinear_resize_kernel_float4_param_8];
	ld.param.f32 	%f71, [_d_bilinear_resize_kernel_float4_param_9];
	ld.param.f32 	%f72, [_d_bilinear_resize_kernel_float4_param_10];
	ld.param.f32 	%f73, [_d_bilinear_resize_kernel_float4_param_11];
	mov.u32 	%r16, %ntid.x;
	mov.u32 	%r17, %ctaid.x;
	mov.u32 	%r18, %tid.x;
	mad.lo.s32 	%r1, %r16, %r17, %r18;
	mov.u32 	%r19, %ntid.y;
	mov.u32 	%r20, %ctaid.y;
	mov.u32 	%r21, %tid.y;
	mad.lo.s32 	%r2, %r19, %r20, %r21;
	setp.lt.s32	%p1, %r1, %r14;
	setp.lt.s32	%p2, %r2, %r15;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB3_14;
	bra.uni 	BB3_1;

BB3_1:
	rcp.approx.ftz.f32 	%f74, %f71;
	mov.f32 	%f75, 0f3F800000;
	add.s32 	%r22, %r1, 1;
	mov.u32 	%r23, 1;
	cvt.rn.f32.s32	%f76, %r22;
	fma.rn.ftz.f32 	%f77, %f76, %f74, 0fBF800000;
	add.s32 	%r24, %r2, 1;
	cvt.rn.f32.s32	%f78, %r24;
	rcp.approx.ftz.f32 	%f79, %f72;
	fma.rn.ftz.f32 	%f80, %f78, %f79, 0fBF800000;
	cvt.rzi.ftz.s32.f32	%r3, %f77;
	cvt.rzi.ftz.s32.f32	%r4, %f80;
	cvt.rn.f32.s32	%f81, %r3;
	sub.ftz.f32 	%f82, %f77, %f81;
	cvt.rzi.ftz.s32.f32	%r25, %f82;
	min.s32 	%r26, %r25, %r23;
	setp.gt.s32	%p4, %r26, 0;
	mov.u32 	%r27, 0;
	cvt.rn.f32.s32	%f83, %r26;
	selp.f32	%f1, %f83, 0f00000000, %p4;
	cvt.rn.f32.s32	%f84, %r4;
	sub.ftz.f32 	%f85, %f80, %f84;
	cvt.rzi.ftz.s32.f32	%r28, %f85;
	min.s32 	%r29, %r28, %r23;
	setp.gt.s32	%p5, %r29, 0;
	cvt.rn.f32.s32	%f86, %r29;
	selp.f32	%f2, %f86, 0f00000000, %p5;
	add.s32 	%r30, %r12, -1;
	min.s32 	%r31, %r30, %r3;
	max.s32 	%r5, %r27, %r31;
	sub.ftz.f32 	%f87, %f75, %f1;
	abs.ftz.f32 	%f3, %f87;
	add.s32 	%r32, %r13, -1;
	min.s32 	%r33, %r32, %r4;
	max.s32 	%r34, %r27, %r33;
	sub.ftz.f32 	%f88, %f75, %f2;
	abs.ftz.f32 	%f4, %f88;
	mul.ftz.f32 	%f5, %f3, %f4;
	mul.lo.s32 	%r6, %r34, %r10;
	setp.eq.s32	%p6, %r11, 0;
	@%p6 bra 	BB3_3;

	cvta.to.global.u64 	%rd6, %rd5;
	add.s32 	%r35, %r6, %r5;
	mul.wide.s32 	%rd7, %r35, 16;
	add.s64 	%rd8, %rd6, %rd7;
	ld.global.v4.f32 	{%f89, %f90, %f91, %f92}, [%rd8];
	mov.f32 	%f120, %f92;
	mov.f32 	%f119, %f91;
	mov.f32 	%f118, %f90;
	mov.f32 	%f117, %f89;
	bra.uni 	BB3_4;

BB3_3:
	cvta.to.global.u64 	%rd9, %rd5;
	add.s32 	%r36, %r6, %r5;
	mul.wide.s32 	%rd10, %r36, 8;
	add.s64 	%rd11, %rd9, %rd10;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd11];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f117, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f118, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f119, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f120, %temp;
	}

BB3_4:
	fma.rn.ftz.f32 	%f18, %f117, %f5, 0f00000000;
	mov.f32 	%f93, 0f00000000;
	fma.rn.ftz.f32 	%f19, %f118, %f5, 0f00000000;
	fma.rn.ftz.f32 	%f20, %f119, %f5, 0f00000000;
	fma.rn.ftz.f32 	%f21, %f120, %f5, 0f00000000;
	add.s32 	%r38, %r4, 1;
	min.s32 	%r39, %r32, %r38;
	max.s32 	%r41, %r27, %r39;
	sub.ftz.f32 	%f94, %f93, %f2;
	abs.ftz.f32 	%f22, %f94;
	mul.ftz.f32 	%f23, %f3, %f22;
	mul.lo.s32 	%r7, %r41, %r10;
	add.s32 	%r42, %r7, %r5;
	cvt.s64.s32	%rd1, %r42;
	@%p6 bra 	BB3_6;

	cvta.to.global.u64 	%rd12, %rd5;
	shl.b64 	%rd13, %rd1, 4;
	add.s64 	%rd14, %rd12, %rd13;
	ld.global.v4.f32 	{%f95, %f96, %f97, %f98}, [%rd14];
	mov.f32 	%f124, %f98;
	mov.f32 	%f123, %f97;
	mov.f32 	%f122, %f96;
	mov.f32 	%f121, %f95;
	bra.uni 	BB3_7;

BB3_6:
	cvta.to.global.u64 	%rd15, %rd5;
	shl.b64 	%rd16, %rd1, 3;
	add.s64 	%rd17, %rd15, %rd16;
	ld.global.v4.u16 	{%rs9, %rs10, %rs11, %rs12}, [%rd17];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs9;
	cvt.f32.f16 	%f121, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs10;
	cvt.f32.f16 	%f122, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs11;
	cvt.f32.f16 	%f123, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs12;
	cvt.f32.f16 	%f124, %temp;
	}

BB3_7:
	fma.rn.ftz.f32 	%f36, %f121, %f23, %f18;
	fma.rn.ftz.f32 	%f37, %f122, %f23, %f19;
	fma.rn.ftz.f32 	%f38, %f123, %f23, %f20;
	fma.rn.ftz.f32 	%f39, %f124, %f23, %f21;
	add.s32 	%r44, %r3, 1;
	min.s32 	%r45, %r30, %r44;
	max.s32 	%r8, %r27, %r45;
	sub.ftz.f32 	%f100, %f93, %f1;
	abs.ftz.f32 	%f40, %f100;
	mul.ftz.f32 	%f41, %f40, %f4;
	add.s32 	%r47, %r6, %r8;
	cvt.s64.s32	%rd2, %r47;
	@%p6 bra 	BB3_9;

	cvta.to.global.u64 	%rd18, %rd5;
	shl.b64 	%rd19, %rd2, 4;
	add.s64 	%rd20, %rd18, %rd19;
	ld.global.v4.f32 	{%f101, %f102, %f103, %f104}, [%rd20];
	mov.f32 	%f128, %f104;
	mov.f32 	%f127, %f103;
	mov.f32 	%f126, %f102;
	mov.f32 	%f125, %f101;
	bra.uni 	BB3_10;

BB3_9:
	cvta.to.global.u64 	%rd21, %rd5;
	shl.b64 	%rd22, %rd2, 3;
	add.s64 	%rd23, %rd21, %rd22;
	ld.global.v4.u16 	{%rs17, %rs18, %rs19, %rs20}, [%rd23];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs17;
	cvt.f32.f16 	%f125, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs18;
	cvt.f32.f16 	%f126, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs19;
	cvt.f32.f16 	%f127, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs20;
	cvt.f32.f16 	%f128, %temp;
	}

BB3_10:
	fma.rn.ftz.f32 	%f54, %f125, %f41, %f36;
	fma.rn.ftz.f32 	%f55, %f126, %f41, %f37;
	fma.rn.ftz.f32 	%f56, %f127, %f41, %f38;
	fma.rn.ftz.f32 	%f57, %f128, %f41, %f39;
	mul.ftz.f32 	%f58, %f40, %f22;
	add.s32 	%r48, %r7, %r8;
	cvt.s64.s32	%rd3, %r48;
	@%p6 bra 	BB3_12;

	cvta.to.global.u64 	%rd24, %rd5;
	shl.b64 	%rd25, %rd3, 4;
	add.s64 	%rd26, %rd24, %rd25;
	ld.global.v4.f32 	{%f105, %f106, %f107, %f108}, [%rd26];
	mov.f32 	%f132, %f108;
	mov.f32 	%f131, %f107;
	mov.f32 	%f130, %f106;
	mov.f32 	%f129, %f105;
	bra.uni 	BB3_13;

BB3_12:
	cvta.to.global.u64 	%rd27, %rd5;
	shl.b64 	%rd28, %rd3, 3;
	add.s64 	%rd29, %rd27, %rd28;
	ld.global.v4.u16 	{%rs25, %rs26, %rs27, %rs28}, [%rd29];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs25;
	cvt.f32.f16 	%f129, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs26;
	cvt.f32.f16 	%f130, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs27;
	cvt.f32.f16 	%f131, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs28;
	cvt.f32.f16 	%f132, %temp;
	}

BB3_13:
	cvta.to.global.u64 	%rd30, %rd4;
	fma.rn.ftz.f32 	%f109, %f129, %f58, %f54;
	fma.rn.ftz.f32 	%f110, %f130, %f58, %f55;
	fma.rn.ftz.f32 	%f111, %f131, %f58, %f56;
	fma.rn.ftz.f32 	%f112, %f132, %f58, %f57;
	mad.lo.s32 	%r57, %r2, %r9, %r1;
	mul.wide.s32 	%rd31, %r57, 16;
	add.s64 	%rd32, %rd30, %rd31;
	mul.ftz.f32 	%f113, %f109, %f73;
	mul.ftz.f32 	%f114, %f110, %f73;
	mul.ftz.f32 	%f115, %f111, %f73;
	mul.ftz.f32 	%f116, %f112, %f73;
	st.global.v4.f32 	[%rd32], {%f113, %f114, %f115, %f116};

BB3_14:
	ret;
}

.visible .entry _d_resize_flow_nn_kernel_f2p_f2p(
	.param .u64 _d_resize_flow_nn_kernel_f2p_f2p_param_0,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_1,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_2,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_3,
	.param .u64 _d_resize_flow_nn_kernel_f2p_f2p_param_4,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_5,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_6,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_7,
	.param .f32 _d_resize_flow_nn_kernel_f2p_f2p_param_8,
	.param .f32 _d_resize_flow_nn_kernel_f2p_f2p_param_9,
	.param .f32 _d_resize_flow_nn_kernel_f2p_f2p_param_10
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<19>;
	.reg .f32 	%f<16>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [_d_resize_flow_nn_kernel_f2p_f2p_param_0];
	ld.param.u32 	%r3, [_d_resize_flow_nn_kernel_f2p_f2p_param_1];
	ld.param.u32 	%r5, [_d_resize_flow_nn_kernel_f2p_f2p_param_2];
	ld.param.u32 	%r6, [_d_resize_flow_nn_kernel_f2p_f2p_param_3];
	ld.param.u64 	%rd2, [_d_resize_flow_nn_kernel_f2p_f2p_param_4];
	ld.param.u32 	%r4, [_d_resize_flow_nn_kernel_f2p_f2p_param_5];
	ld.param.f32 	%f1, [_d_resize_flow_nn_kernel_f2p_f2p_param_8];
	ld.param.f32 	%f2, [_d_resize_flow_nn_kernel_f2p_f2p_param_9];
	ld.param.f32 	%f3, [_d_resize_flow_nn_kernel_f2p_f2p_param_10];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB4_2;
	bra.uni 	BB4_1;

BB4_1:
	cvta.to.global.u64 	%rd3, %rd1;
	cvta.to.global.u64 	%rd4, %rd2;
	add.s32 	%r13, %r1, 1;
	cvt.rn.f32.s32	%f4, %r13;
	rcp.approx.ftz.f32 	%f5, %f1;
	fma.rn.ftz.f32 	%f6, %f4, %f5, 0fBF800000;
	add.s32 	%r14, %r2, 1;
	cvt.rn.f32.s32	%f7, %r14;
	rcp.approx.ftz.f32 	%f8, %f2;
	fma.rn.ftz.f32 	%f9, %f7, %f8, 0fBF800000;
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	mul.wide.s32 	%rd5, %r15, 8;
	add.s64 	%rd6, %rd3, %rd5;
	cvt.rzi.ftz.s32.f32	%r16, %f6;
	cvt.rzi.ftz.s32.f32	%r17, %f9;
	mad.lo.s32 	%r18, %r17, %r4, %r16;
	mul.wide.s32 	%rd7, %r18, 8;
	add.s64 	%rd8, %rd4, %rd7;
	ld.global.v2.f32 	{%f10, %f11}, [%rd8];
	mul.ftz.f32 	%f13, %f11, %f3;
	mul.ftz.f32 	%f15, %f10, %f3;
	st.global.v2.f32 	[%rd6], {%f15, %f13};

BB4_2:
	ret;
}

.visible .entry _d_resize_flow_kernel_f2p_f1p(
	.param .u64 _d_resize_flow_kernel_f2p_f1p_param_0,
	.param .u64 _d_resize_flow_kernel_f2p_f1p_param_1,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_2,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_3,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_4,
	.param .u64 _d_resize_flow_kernel_f2p_f1p_param_5,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_6,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_7,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_8,
	.param .f32 _d_resize_flow_kernel_f2p_f1p_param_9,
	.param .f32 _d_resize_flow_kernel_f2p_f1p_param_10,
	.param .f32 _d_resize_flow_kernel_f2p_f1p_param_11,
	.param .f32 _d_resize_flow_kernel_f2p_f1p_param_12
)
{
	.reg .pred 	%p<6>;
	.reg .s32 	%r<44>;
	.reg .f32 	%f<59>;
	.reg .s64 	%rd<18>;


	ld.param.u64 	%rd1, [_d_resize_flow_kernel_f2p_f1p_param_0];
	ld.param.u64 	%rd2, [_d_resize_flow_kernel_f2p_f1p_param_1];
	ld.param.u32 	%r3, [_d_resize_flow_kernel_f2p_f1p_param_2];
	ld.param.u32 	%r7, [_d_resize_flow_kernel_f2p_f1p_param_3];
	ld.param.u32 	%r8, [_d_resize_flow_kernel_f2p_f1p_param_4];
	ld.param.u64 	%rd3, [_d_resize_flow_kernel_f2p_f1p_param_5];
	ld.param.u32 	%r4, [_d_resize_flow_kernel_f2p_f1p_param_6];
	ld.param.u32 	%r5, [_d_resize_flow_kernel_f2p_f1p_param_7];
	ld.param.u32 	%r6, [_d_resize_flow_kernel_f2p_f1p_param_8];
	ld.param.f32 	%f1, [_d_resize_flow_kernel_f2p_f1p_param_9];
	ld.param.f32 	%f2, [_d_resize_flow_kernel_f2p_f1p_param_10];
	ld.param.f32 	%f3, [_d_resize_flow_kernel_f2p_f1p_param_11];
	ld.param.f32 	%f4, [_d_resize_flow_kernel_f2p_f1p_param_12];
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB5_2;
	bra.uni 	BB5_1;

BB5_1:
	cvta.to.global.u64 	%rd4, %rd2;
	cvta.to.global.u64 	%rd5, %rd1;
	cvta.to.global.u64 	%rd6, %rd3;
	add.s32 	%r15, %r1, 1;
	mov.u32 	%r16, 1;
	cvt.rn.f32.s32	%f5, %r15;
	rcp.approx.ftz.f32 	%f6, %f1;
	mov.f32 	%f7, 0f3F800000;
	fma.rn.ftz.f32 	%f8, %f5, %f6, 0fBF800000;
	add.s32 	%r17, %r2, 1;
	cvt.rn.f32.s32	%f9, %r17;
	rcp.approx.ftz.f32 	%f10, %f2;
	fma.rn.ftz.f32 	%f11, %f9, %f10, 0fBF800000;
	cvt.rzi.ftz.s32.f32	%r18, %f8;
	cvt.rzi.ftz.s32.f32	%r19, %f11;
	cvt.rn.f32.s32	%f12, %r18;
	sub.ftz.f32 	%f13, %f8, %f12;
	cvt.rzi.ftz.s32.f32	%r20, %f13;
	min.s32 	%r21, %r20, %r16;
	setp.gt.s32	%p4, %r21, 0;
	mov.u32 	%r22, 0;
	cvt.rn.f32.s32	%f14, %r21;
	selp.f32	%f15, %f14, 0f00000000, %p4;
	mov.f32 	%f16, 0f00000000;
	cvt.rn.f32.s32	%f17, %r19;
	sub.ftz.f32 	%f18, %f11, %f17;
	cvt.rzi.ftz.s32.f32	%r23, %f18;
	min.s32 	%r24, %r23, %r16;
	setp.gt.s32	%p5, %r24, 0;
	cvt.rn.f32.s32	%f19, %r24;
	selp.f32	%f20, %f19, 0f00000000, %p5;
	add.s32 	%r25, %r6, -1;
	min.s32 	%r26, %r25, %r19;
	max.s32 	%r27, %r22, %r26;
	sub.ftz.f32 	%f21, %f7, %f20;
	add.s32 	%r28, %r5, -1;
	min.s32 	%r29, %r28, %r18;
	max.s32 	%r30, %r22, %r29;
	sub.ftz.f32 	%f22, %f7, %f15;
	abs.ftz.f32 	%f23, %f22;
	abs.ftz.f32 	%f24, %f21;
	mul.ftz.f32 	%f25, %f23, %f24;
	mul.lo.s32 	%r31, %r27, %r4;
	add.s32 	%r32, %r31, %r30;
	mul.wide.s32 	%rd7, %r32, 8;
	add.s64 	%rd8, %rd6, %rd7;
	ld.global.v2.f32 	{%f26, %f27}, [%rd8];
	fma.rn.ftz.f32 	%f29, %f26, %f25, 0f00000000;
	fma.rn.ftz.f32 	%f31, %f27, %f25, 0f00000000;
	add.s32 	%r33, %r19, 1;
	min.s32 	%r34, %r25, %r33;
	max.s32 	%r35, %r22, %r34;
	sub.ftz.f32 	%f32, %f16, %f20;
	abs.ftz.f32 	%f33, %f32;
	mul.ftz.f32 	%f34, %f23, %f33;
	mul.lo.s32 	%r36, %r35, %r4;
	add.s32 	%r37, %r36, %r30;
	mul.wide.s32 	%rd9, %r37, 8;
	add.s64 	%rd10, %rd6, %rd9;
	ld.global.v2.f32 	{%f35, %f36}, [%rd10];
	fma.rn.ftz.f32 	%f38, %f35, %f34, %f29;
	fma.rn.ftz.f32 	%f40, %f36, %f34, %f31;
	add.s32 	%r38, %r18, 1;
	min.s32 	%r39, %r28, %r38;
	max.s32 	%r40, %r22, %r39;
	sub.ftz.f32 	%f41, %f16, %f15;
	abs.ftz.f32 	%f42, %f41;
	mul.ftz.f32 	%f43, %f42, %f24;
	add.s32 	%r41, %r31, %r40;
	mul.wide.s32 	%rd11, %r41, 8;
	add.s64 	%rd12, %rd6, %rd11;
	ld.global.v2.f32 	{%f44, %f45}, [%rd12];
	fma.rn.ftz.f32 	%f47, %f44, %f43, %f38;
	fma.rn.ftz.f32 	%f49, %f45, %f43, %f40;
	mul.ftz.f32 	%f50, %f42, %f33;
	add.s32 	%r42, %r36, %r40;
	mul.wide.s32 	%rd13, %r42, 8;
	add.s64 	%rd14, %rd6, %rd13;
	ld.global.v2.f32 	{%f51, %f52}, [%rd14];
	fma.rn.ftz.f32 	%f54, %f51, %f50, %f47;
	fma.rn.ftz.f32 	%f56, %f52, %f50, %f49;
	mul.ftz.f32 	%f57, %f54, %f3;
	mad.lo.s32 	%r43, %r2, %r3, %r1;
	mul.wide.s32 	%rd15, %r43, 4;
	add.s64 	%rd16, %rd5, %rd15;
	st.global.f32 	[%rd16], %f57;
	mul.ftz.f32 	%f58, %f56, %f4;
	add.s64 	%rd17, %rd4, %rd15;
	st.global.f32 	[%rd17], %f58;

BB5_2:
	ret;
}

.visible .entry _d_resize_flow_kernel_f2p_f2p(
	.param .u64 _d_resize_flow_kernel_f2p_f2p_param_0,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_1,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_2,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_3,
	.param .u64 _d_resize_flow_kernel_f2p_f2p_param_4,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_5,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_6,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_7,
	.param .f32 _d_resize_flow_kernel_f2p_f2p_param_8,
	.param .f32 _d_resize_flow_kernel_f2p_f2p_param_9,
	.param .f32 _d_resize_flow_kernel_f2p_f2p_param_10,
	.param .f32 _d_resize_flow_kernel_f2p_f2p_param_11
)
{
	.reg .pred 	%p<6>;
	.reg .s32 	%r<44>;
	.reg .f32 	%f<59>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd1, [_d_resize_flow_kernel_f2p_f2p_param_0];
	ld.param.u32 	%r3, [_d_resize_flow_kernel_f2p_f2p_param_1];
	ld.param.u32 	%r7, [_d_resize_flow_kernel_f2p_f2p_param_2];
	ld.param.u32 	%r8, [_d_resize_flow_kernel_f2p_f2p_param_3];
	ld.param.u64 	%rd2, [_d_resize_flow_kernel_f2p_f2p_param_4];
	ld.param.u32 	%r4, [_d_resize_flow_kernel_f2p_f2p_param_5];
	ld.param.u32 	%r5, [_d_resize_flow_kernel_f2p_f2p_param_6];
	ld.param.u32 	%r6, [_d_resize_flow_kernel_f2p_f2p_param_7];
	ld.param.f32 	%f1, [_d_resize_flow_kernel_f2p_f2p_param_8];
	ld.param.f32 	%f2, [_d_resize_flow_kernel_f2p_f2p_param_9];
	ld.param.f32 	%f3, [_d_resize_flow_kernel_f2p_f2p_param_10];
	ld.param.f32 	%f4, [_d_resize_flow_kernel_f2p_f2p_param_11];
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB6_2;
	bra.uni 	BB6_1;

BB6_1:
	cvta.to.global.u64 	%rd3, %rd1;
	cvta.to.global.u64 	%rd4, %rd2;
	add.s32 	%r15, %r1, 1;
	mov.u32 	%r16, 1;
	cvt.rn.f32.s32	%f5, %r15;
	rcp.approx.ftz.f32 	%f6, %f1;
	mov.f32 	%f7, 0f3F800000;
	fma.rn.ftz.f32 	%f8, %f5, %f6, 0fBF800000;
	add.s32 	%r17, %r2, 1;
	cvt.rn.f32.s32	%f9, %r17;
	rcp.approx.ftz.f32 	%f10, %f2;
	fma.rn.ftz.f32 	%f11, %f9, %f10, 0fBF800000;
	cvt.rzi.ftz.s32.f32	%r18, %f8;
	cvt.rzi.ftz.s32.f32	%r19, %f11;
	cvt.rn.f32.s32	%f12, %r18;
	sub.ftz.f32 	%f13, %f8, %f12;
	cvt.rzi.ftz.s32.f32	%r20, %f13;
	min.s32 	%r21, %r20, %r16;
	setp.gt.s32	%p4, %r21, 0;
	mov.u32 	%r22, 0;
	cvt.rn.f32.s32	%f14, %r21;
	selp.f32	%f15, %f14, 0f00000000, %p4;
	mov.f32 	%f16, 0f00000000;
	cvt.rn.f32.s32	%f17, %r19;
	sub.ftz.f32 	%f18, %f11, %f17;
	cvt.rzi.ftz.s32.f32	%r23, %f18;
	min.s32 	%r24, %r23, %r16;
	setp.gt.s32	%p5, %r24, 0;
	cvt.rn.f32.s32	%f19, %r24;
	selp.f32	%f20, %f19, 0f00000000, %p5;
	add.s32 	%r25, %r6, -1;
	min.s32 	%r26, %r25, %r19;
	max.s32 	%r27, %r22, %r26;
	sub.ftz.f32 	%f21, %f7, %f20;
	add.s32 	%r28, %r5, -1;
	min.s32 	%r29, %r28, %r18;
	max.s32 	%r30, %r22, %r29;
	sub.ftz.f32 	%f22, %f7, %f15;
	abs.ftz.f32 	%f23, %f22;
	abs.ftz.f32 	%f24, %f21;
	mul.ftz.f32 	%f25, %f23, %f24;
	mul.lo.s32 	%r31, %r27, %r4;
	add.s32 	%r32, %r31, %r30;
	mul.wide.s32 	%rd5, %r32, 8;
	add.s64 	%rd6, %rd4, %rd5;
	ld.global.v2.f32 	{%f26, %f27}, [%rd6];
	fma.rn.ftz.f32 	%f29, %f26, %f25, 0f00000000;
	fma.rn.ftz.f32 	%f31, %f27, %f25, 0f00000000;
	add.s32 	%r33, %r19, 1;
	min.s32 	%r34, %r25, %r33;
	max.s32 	%r35, %r22, %r34;
	sub.ftz.f32 	%f32, %f16, %f20;
	abs.ftz.f32 	%f33, %f32;
	mul.ftz.f32 	%f34, %f23, %f33;
	mul.lo.s32 	%r36, %r35, %r4;
	add.s32 	%r37, %r36, %r30;
	mul.wide.s32 	%rd7, %r37, 8;
	add.s64 	%rd8, %rd4, %rd7;
	ld.global.v2.f32 	{%f35, %f36}, [%rd8];
	fma.rn.ftz.f32 	%f38, %f35, %f34, %f29;
	fma.rn.ftz.f32 	%f40, %f36, %f34, %f31;
	add.s32 	%r38, %r18, 1;
	min.s32 	%r39, %r28, %r38;
	max.s32 	%r40, %r22, %r39;
	sub.ftz.f32 	%f41, %f16, %f15;
	abs.ftz.f32 	%f42, %f41;
	mul.ftz.f32 	%f43, %f42, %f24;
	add.s32 	%r41, %r31, %r40;
	mul.wide.s32 	%rd9, %r41, 8;
	add.s64 	%rd10, %rd4, %rd9;
	ld.global.v2.f32 	{%f44, %f45}, [%rd10];
	fma.rn.ftz.f32 	%f47, %f44, %f43, %f38;
	fma.rn.ftz.f32 	%f49, %f45, %f43, %f40;
	mul.ftz.f32 	%f50, %f42, %f33;
	add.s32 	%r42, %r36, %r40;
	mul.wide.s32 	%rd11, %r42, 8;
	add.s64 	%rd12, %rd4, %rd11;
	ld.global.v2.f32 	{%f51, %f52}, [%rd12];
	fma.rn.ftz.f32 	%f54, %f51, %f50, %f47;
	fma.rn.ftz.f32 	%f56, %f52, %f50, %f49;
	mad.lo.s32 	%r43, %r2, %r3, %r1;
	mul.wide.s32 	%rd13, %r43, 8;
	add.s64 	%rd14, %rd3, %rd13;
	mul.ftz.f32 	%f57, %f56, %f4;
	mul.ftz.f32 	%f58, %f54, %f3;
	st.global.v2.f32 	[%rd14], {%f58, %f57};

BB6_2:
	ret;
}

.visible .entry _d_image_init_kernel_float(
	.param .u64 _d_image_init_kernel_float_param_0,
	.param .u32 _d_image_init_kernel_float_param_1,
	.param .f32 _d_image_init_kernel_float_param_2,
	.param .u32 _d_image_init_kernel_float_param_3,
	.param .u32 _d_image_init_kernel_float_param_4
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<13>;
	.reg .f32 	%f<2>;
	.reg .s64 	%rd<5>;


	ld.param.u64 	%rd1, [_d_image_init_kernel_float_param_0];
	ld.param.u32 	%r3, [_d_image_init_kernel_float_param_1];
	ld.param.f32 	%f1, [_d_image_init_kernel_float_param_2];
	ld.param.u32 	%r4, [_d_image_init_kernel_float_param_3];
	ld.param.u32 	%r5, [_d_image_init_kernel_float_param_4];
	mov.u32 	%r6, %ntid.x;
	mov.u32 	%r7, %ctaid.x;
	mov.u32 	%r8, %tid.x;
	mad.lo.s32 	%r1, %r6, %r7, %r8;
	mov.u32 	%r9, %ntid.y;
	mov.u32 	%r10, %ctaid.y;
	mov.u32 	%r11, %tid.y;
	mad.lo.s32 	%r2, %r9, %r10, %r11;
	setp.lt.s32	%p1, %r1, %r4;
	setp.lt.s32	%p2, %r2, %r5;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB7_2;
	bra.uni 	BB7_1;

BB7_1:
	cvta.to.global.u64 	%rd2, %rd1;
	mad.lo.s32 	%r12, %r2, %r3, %r1;
	mul.wide.s32 	%rd3, %r12, 4;
	add.s64 	%rd4, %rd2, %rd3;
	st.global.f32 	[%rd4], %f1;

BB7_2:
	ret;
}

.visible .entry _d_image_init_kernel_float2(
	.param .u64 _d_image_init_kernel_float2_param_0,
	.param .u32 _d_image_init_kernel_float2_param_1,
	.param .align 8 .b8 _d_image_init_kernel_float2_param_2[8],
	.param .u32 _d_image_init_kernel_float2_param_3,
	.param .u32 _d_image_init_kernel_float2_param_4
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<13>;
	.reg .f32 	%f<3>;
	.reg .s64 	%rd<5>;


	ld.param.u64 	%rd1, [_d_image_init_kernel_float2_param_0];
	ld.param.u32 	%r3, [_d_image_init_kernel_float2_param_1];
	ld.param.f32 	%f2, [_d_image_init_kernel_float2_param_2+4];
	ld.param.f32 	%f1, [_d_image_init_kernel_float2_param_2];
	ld.param.u32 	%r4, [_d_image_init_kernel_float2_param_3];
	ld.param.u32 	%r5, [_d_image_init_kernel_float2_param_4];
	mov.u32 	%r6, %ntid.x;
	mov.u32 	%r7, %ctaid.x;
	mov.u32 	%r8, %tid.x;
	mad.lo.s32 	%r1, %r6, %r7, %r8;
	mov.u32 	%r9, %ntid.y;
	mov.u32 	%r10, %ctaid.y;
	mov.u32 	%r11, %tid.y;
	mad.lo.s32 	%r2, %r9, %r10, %r11;
	setp.lt.s32	%p1, %r1, %r4;
	setp.lt.s32	%p2, %r2, %r5;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB8_2;
	bra.uni 	BB8_1;

BB8_1:
	cvta.to.global.u64 	%rd2, %rd1;
	mad.lo.s32 	%r12, %r2, %r3, %r1;
	mul.wide.s32 	%rd3, %r12, 8;
	add.s64 	%rd4, %rd2, %rd3;
	st.global.v2.f32 	[%rd4], {%f1, %f2};

BB8_2:
	ret;
}

.visible .entry _d_image_cvt_vec2_to_sg2_kernel_float_float2(
	.param .u64 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_0,
	.param .u64 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_1,
	.param .u32 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_2,
	.param .u64 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_3,
	.param .u32 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_4,
	.param .u32 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_5,
	.param .u32 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_6
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<15>;
	.reg .f32 	%f<5>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd1, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_0];
	ld.param.u64 	%rd2, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_1];
	ld.param.u32 	%r3, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_2];
	ld.param.u64 	%rd3, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_3];
	ld.param.u32 	%r4, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_4];
	ld.param.u32 	%r5, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_5];
	ld.param.u32 	%r6, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_6];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB9_2;
	bra.uni 	BB9_1;

BB9_1:
	cvta.to.global.u64 	%rd4, %rd2;
	cvta.to.global.u64 	%rd5, %rd1;
	cvta.to.global.u64 	%rd6, %rd3;
	mad.lo.s32 	%r13, %r2, %r4, %r1;
	mul.wide.s32 	%rd7, %r13, 8;
	add.s64 	%rd8, %rd6, %rd7;
	ld.global.v2.f32 	{%f1, %f2}, [%rd8];
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd9, %r14, 4;
	add.s64 	%rd10, %rd5, %rd9;
	st.global.f32 	[%rd10], %f1;
	add.s64 	%rd11, %rd4, %rd9;
	st.global.f32 	[%rd11], %f2;

BB9_2:
	ret;
}

.visible .entry _d_image_cvt_sg2_to_vec2_kernel_float2_float(
	.param .u64 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_0,
	.param .u32 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_1,
	.param .u64 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_2,
	.param .u64 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_3,
	.param .u32 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_4,
	.param .u32 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_5,
	.param .u32 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_6
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<15>;
	.reg .f32 	%f<3>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd1, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_0];
	ld.param.u32 	%r3, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_1];
	ld.param.u64 	%rd2, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_2];
	ld.param.u64 	%rd3, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_3];
	ld.param.u32 	%r4, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_4];
	ld.param.u32 	%r5, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_5];
	ld.param.u32 	%r6, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_6];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB10_2;
	bra.uni 	BB10_1;

BB10_1:
	cvta.to.global.u64 	%rd4, %rd1;
	cvta.to.global.u64 	%rd5, %rd3;
	cvta.to.global.u64 	%rd6, %rd2;
	mad.lo.s32 	%r13, %r2, %r4, %r1;
	mul.wide.s32 	%rd7, %r13, 4;
	add.s64 	%rd8, %rd6, %rd7;
	add.s64 	%rd9, %rd5, %rd7;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd10, %r14, 8;
	add.s64 	%rd11, %rd4, %rd10;
	ld.global.f32 	%f1, [%rd9];
	ld.global.f32 	%f2, [%rd8];
	st.global.v2.f32 	[%rd11], {%f2, %f1};

BB10_2:
	ret;
}

.visible .entry _d_image_mutiply_scalar_kernel_float2(
	.param .u64 _d_image_mutiply_scalar_kernel_float2_param_0,
	.param .u32 _d_image_mutiply_scalar_kernel_float2_param_1,
	.param .u64 _d_image_mutiply_scalar_kernel_float2_param_2,
	.param .u32 _d_image_mutiply_scalar_kernel_float2_param_3,
	.param .f32 _d_image_mutiply_scalar_kernel_float2_param_4,
	.param .u32 _d_image_mutiply_scalar_kernel_float2_param_5,
	.param .u32 _d_image_mutiply_scalar_kernel_float2_param_6
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<15>;
	.reg .f32 	%f<8>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [_d_image_mutiply_scalar_kernel_float2_param_0];
	ld.param.u32 	%r3, [_d_image_mutiply_scalar_kernel_float2_param_1];
	ld.param.u64 	%rd2, [_d_image_mutiply_scalar_kernel_float2_param_2];
	ld.param.u32 	%r4, [_d_image_mutiply_scalar_kernel_float2_param_3];
	ld.param.f32 	%f1, [_d_image_mutiply_scalar_kernel_float2_param_4];
	ld.param.u32 	%r5, [_d_image_mutiply_scalar_kernel_float2_param_5];
	ld.param.u32 	%r6, [_d_image_mutiply_scalar_kernel_float2_param_6];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB11_2;
	bra.uni 	BB11_1;

BB11_1:
	cvta.to.global.u64 	%rd3, %rd1;
	cvta.to.global.u64 	%rd4, %rd2;
	mad.lo.s32 	%r13, %r2, %r3, %r1;
	mad.lo.s32 	%r14, %r2, %r4, %r1;
	mul.wide.s32 	%rd5, %r13, 8;
	add.s64 	%rd6, %rd3, %rd5;
	mul.wide.s32 	%rd7, %r14, 8;
	add.s64 	%rd8, %rd4, %rd7;
	ld.global.v2.f32 	{%f2, %f3}, [%rd8];
	mul.ftz.f32 	%f5, %f3, %f1;
	mul.ftz.f32 	%f7, %f2, %f1;
	st.global.v2.f32 	[%rd6], {%f7, %f5};

BB11_2:
	ret;
}


