//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64


.visible .entry SDRConform(
	.param .u64 SDRConform_param_0,
	.param .u32 SDRConform_param_1,
	.param .u32 SDRConform_param_2,
	.param .u32 SDRConform_param_3,
	.param .u32 SDRConform_param_4,
	.param .align 16 .b8 SDRConform_param_5[16],
	.param .align 16 .b8 SDRConform_param_6[16],
	.param .align 16 .b8 SDRConform_param_7[16],
	.param .align 8 .b8 SDRConform_param_8[8],
	.param .f32 SDRConform_param_9,
	.param .f32 SDRConform_param_10,
	.param .f32 SDRConform_param_11,
	.param .f32 SDRConform_param_12,
	.param .f32 SDRConform_param_13,
	.param .align 16 .b8 SDRConform_param_14[16],
	.param .align 16 .b8 SDRConform_param_15[16],
	.param .align 16 .b8 SDRConform_param_16[16]
)
{
	.reg .pred 	%p<24>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<49>;
	.reg .f32 	%f<186>;
	.reg .s64 	%rd<14>;


	ld.param.u64 	%rd1, [SDRConform_param_0];
	ld.param.u32 	%r1, [SDRConform_param_1];
	ld.param.u32 	%r3, [SDRConform_param_2];
	ld.param.u32 	%r4, [SDRConform_param_3];
	ld.param.u32 	%r2, [SDRConform_param_4];
	ld.param.f32 	%f63, [SDRConform_param_5+8];
	ld.param.f32 	%f62, [SDRConform_param_5+4];
	ld.param.f32 	%f61, [SDRConform_param_5];
	ld.param.f32 	%f67, [SDRConform_param_6+8];
	ld.param.f32 	%f66, [SDRConform_param_6+4];
	ld.param.f32 	%f65, [SDRConform_param_6];
	ld.param.f32 	%f71, [SDRConform_param_7+8];
	ld.param.f32 	%f70, [SDRConform_param_7+4];
	ld.param.f32 	%f69, [SDRConform_param_7];
	ld.param.f32 	%f74, [SDRConform_param_8+4];
	ld.param.f32 	%f73, [SDRConform_param_8];
	ld.param.f32 	%f75, [SDRConform_param_9];
	ld.param.f32 	%f76, [SDRConform_param_10];
	ld.param.f32 	%f77, [SDRConform_param_11];
	ld.param.f32 	%f78, [SDRConform_param_12];
	ld.param.f32 	%f79, [SDRConform_param_13];
	ld.param.f32 	%f82, [SDRConform_param_14+8];
	ld.param.f32 	%f81, [SDRConform_param_14+4];
	ld.param.f32 	%f80, [SDRConform_param_14];
	ld.param.f32 	%f86, [SDRConform_param_15+8];
	ld.param.f32 	%f85, [SDRConform_param_15+4];
	ld.param.f32 	%f84, [SDRConform_param_15];
	ld.param.f32 	%f90, [SDRConform_param_16+8];
	ld.param.f32 	%f89, [SDRConform_param_16+4];
	ld.param.f32 	%f88, [SDRConform_param_16];
	mov.u32 	%r5, %ntid.x;
	mov.u32 	%r6, %ctaid.x;
	mov.u32 	%r7, %tid.x;
	mad.lo.s32 	%r8, %r5, %r6, %r7;
	mov.u32 	%r9, %ntid.y;
	mov.u32 	%r10, %ctaid.y;
	mov.u32 	%r11, %tid.y;
	mad.lo.s32 	%r12, %r9, %r10, %r11;
	setp.lt.s32	%p1, %r8, %r3;
	setp.lt.s32	%p2, %r12, %r4;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB0_30;
	bra.uni 	BB0_1;

BB0_1:
	setp.eq.s32	%p4, %r2, 0;
	@%p4 bra 	BB0_3;

	cvta.to.global.u64 	%rd2, %rd1;
	mad.lo.s32 	%r21, %r12, %r1, %r8;
	mul.wide.s32 	%rd3, %r21, 16;
	add.s64 	%rd4, %rd2, %rd3;
	ld.global.v4.f32 	{%f92, %f93, %f94, %f95}, [%rd4];
	mov.f32 	%f176, %f95;
	mov.f32 	%f175, %f94;
	mov.f32 	%f174, %f93;
	mov.f32 	%f173, %f92;
	bra.uni 	BB0_4;

BB0_3:
	cvta.to.global.u64 	%rd5, %rd1;
	mad.lo.s32 	%r30, %r12, %r1, %r8;
	mul.wide.s32 	%rd6, %r30, 8;
	add.s64 	%rd7, %rd5, %rd6;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd7];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f173, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f174, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f175, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f176, %temp;
	}

BB0_4:
	abs.ftz.f32 	%f15, %f175;
	abs.ftz.f32 	%f17, %f174;
	abs.ftz.f32 	%f19, %f173;
	setp.gt.ftz.f32	%p5, %f15, 0f3DA5E354;
	@%p5 bra 	BB0_6;

	mul.ftz.f32 	%f177, %f15, 0f3E638E39;
	bra.uni 	BB0_7;

BB0_6:
	add.ftz.f32 	%f96, %f15, 0f3DCAC083;
	mul.ftz.f32 	%f97, %f96, 0f3F68F065;
	lg2.approx.ftz.f32 	%f98, %f97;
	mul.ftz.f32 	%f99, %f98, 0f400E38E4;
	ex2.approx.ftz.f32 	%f177, %f99;

BB0_7:
	setp.gt.ftz.f32	%p6, %f17, 0f3DA5E354;
	@%p6 bra 	BB0_9;

	mul.ftz.f32 	%f178, %f17, 0f3E638E39;
	bra.uni 	BB0_10;

BB0_9:
	add.ftz.f32 	%f100, %f17, 0f3DCAC083;
	mul.ftz.f32 	%f101, %f100, 0f3F68F065;
	lg2.approx.ftz.f32 	%f102, %f101;
	mul.ftz.f32 	%f103, %f102, 0f400E38E4;
	ex2.approx.ftz.f32 	%f178, %f103;

BB0_10:
	setp.gt.ftz.f32	%p7, %f19, 0f3DA5E354;
	@%p7 bra 	BB0_12;

	mul.ftz.f32 	%f179, %f19, 0f3E638E39;
	bra.uni 	BB0_13;

BB0_12:
	add.ftz.f32 	%f104, %f19, 0f3DCAC083;
	mul.ftz.f32 	%f105, %f104, 0f3F68F065;
	lg2.approx.ftz.f32 	%f106, %f105;
	mul.ftz.f32 	%f107, %f106, 0f400E38E4;
	ex2.approx.ftz.f32 	%f179, %f107;

BB0_13:
	neg.ftz.f32 	%f108, %f177;
	setp.lt.ftz.f32	%p8, %f175, 0f00000000;
	selp.f32	%f109, %f108, %f177, %p8;
	neg.ftz.f32 	%f110, %f178;
	setp.lt.ftz.f32	%p9, %f174, 0f00000000;
	selp.f32	%f111, %f110, %f178, %p9;
	neg.ftz.f32 	%f112, %f179;
	setp.lt.ftz.f32	%p10, %f173, 0f00000000;
	selp.f32	%f113, %f112, %f179, %p10;
	mul.ftz.f32 	%f114, %f111, %f62;
	fma.rn.ftz.f32 	%f115, %f109, %f61, %f114;
	fma.rn.ftz.f32 	%f29, %f113, %f63, %f115;
	mul.ftz.f32 	%f116, %f111, %f66;
	fma.rn.ftz.f32 	%f117, %f109, %f65, %f116;
	fma.rn.ftz.f32 	%f30, %f113, %f67, %f117;
	mul.ftz.f32 	%f118, %f111, %f70;
	fma.rn.ftz.f32 	%f119, %f109, %f69, %f118;
	fma.rn.ftz.f32 	%f120, %f113, %f71, %f119;
	add.ftz.f32 	%f121, %f29, %f30;
	add.ftz.f32 	%f31, %f121, %f120;
	setp.leu.ftz.f32	%p11, %f31, 0f358637BD;
	mov.f32 	%f180, %f73;
	mov.f32 	%f181, %f74;
	@%p11 bra 	BB0_15;

	div.approx.ftz.f32 	%f180, %f29, %f31;
	div.approx.ftz.f32 	%f181, %f30, %f31;

BB0_15:
	setp.lt.ftz.f32	%p12, %f30, 0f00000000;
	selp.f32	%f122, 0f00000000, %f30, %p12;
	mul.ftz.f32 	%f123, %f122, %f75;
	fma.rn.ftz.f32 	%f124, %f123, 0f42C7FAE1, 0f3F800000;
	lg2.approx.ftz.f32 	%f125, %f124;
	mul.ftz.f32 	%f126, %f125, 0f3E9A209B;
	fma.rn.ftz.f32 	%f38, %f126, 0f3F000000, 0fBF800000;
	setp.lt.ftz.f32	%p13, %f38, 0f00000000;
	neg.ftz.f32 	%f127, %f38;
	selp.f32	%f128, %f127, %f38, %p13;
	setp.gt.ftz.f32	%p14, %f128, 0f3F800000;
	selp.f32	%f39, 0f3F800000, %f128, %p14;
	setp.gt.ftz.f32	%p15, %f39, %f78;
	@%p15 bra 	BB0_17;

	mul.ftz.f32 	%f182, %f39, %f76;
	bra.uni 	BB0_18;

BB0_17:
	sub.ftz.f32 	%f129, %f39, %f78;
	mov.f32 	%f130, 0f3F800000;
	sub.ftz.f32 	%f131, %f130, %f78;
	div.approx.ftz.f32 	%f132, %f129, %f131;
	sub.ftz.f32 	%f133, %f130, %f132;
	lg2.approx.ftz.f32 	%f134, %f133;
	mul.ftz.f32 	%f135, %f134, %f79;
	ex2.approx.ftz.f32 	%f136, %f135;
	sub.ftz.f32 	%f137, %f130, %f136;
	sub.ftz.f32 	%f138, %f130, %f77;
	fma.rn.ftz.f32 	%f182, %f137, %f138, %f77;

BB0_18:
	neg.ftz.f32 	%f139, %f182;
	selp.f32	%f140, %f139, %f182, %p13;
	add.ftz.f32 	%f141, %f140, 0f3F800000;
	mov.f32 	%f142, 0f3F800000;
	mov.f32 	%f143, 0f41200000;
	lg2.approx.ftz.f32 	%f144, %f143;
	mul.ftz.f32 	%f145, %f144, %f141;
	ex2.approx.ftz.f32 	%f146, %f145;
	add.ftz.f32 	%f147, %f146, 0fBF800000;
	mul.ftz.f32 	%f148, %f147, 0f3C257EB5;
	mul.ftz.f32 	%f149, %f180, %f148;
	div.approx.ftz.f32 	%f150, %f149, %f181;
	sub.ftz.f32 	%f151, %f142, %f180;
	sub.ftz.f32 	%f152, %f151, %f181;
	mul.ftz.f32 	%f153, %f152, %f148;
	div.approx.ftz.f32 	%f154, %f153, %f181;
	mul.ftz.f32 	%f155, %f148, %f81;
	fma.rn.ftz.f32 	%f156, %f150, %f80, %f155;
	fma.rn.ftz.f32 	%f43, %f154, %f82, %f156;
	mul.ftz.f32 	%f157, %f148, %f85;
	fma.rn.ftz.f32 	%f158, %f150, %f84, %f157;
	fma.rn.ftz.f32 	%f44, %f154, %f86, %f158;
	mul.ftz.f32 	%f159, %f148, %f89;
	fma.rn.ftz.f32 	%f160, %f150, %f88, %f159;
	fma.rn.ftz.f32 	%f45, %f154, %f90, %f160;
	abs.ftz.f32 	%f46, %f43;
	abs.ftz.f32 	%f47, %f44;
	abs.ftz.f32 	%f48, %f45;
	setp.gt.ftz.f32	%p17, %f46, 0f3C9374BC;
	@%p17 bra 	BB0_20;

	mul.ftz.f32 	%f183, %f46, 0f40900000;
	bra.uni 	BB0_21;

BB0_20:
	lg2.approx.ftz.f32 	%f161, %f46;
	mul.ftz.f32 	%f162, %f161, 0f3EE66666;
	ex2.approx.ftz.f32 	%f163, %f162;
	fma.rn.ftz.f32 	%f183, %f163, 0f3F8CAC08, 0fBDCAC083;

BB0_21:
	setp.gt.ftz.f32	%p18, %f47, 0f3C9374BC;
	@%p18 bra 	BB0_23;

	mul.ftz.f32 	%f184, %f47, 0f40900000;
	bra.uni 	BB0_24;

BB0_23:
	lg2.approx.ftz.f32 	%f164, %f47;
	mul.ftz.f32 	%f165, %f164, 0f3EE66666;
	ex2.approx.ftz.f32 	%f166, %f165;
	fma.rn.ftz.f32 	%f184, %f166, 0f3F8CAC08, 0fBDCAC083;

BB0_24:
	setp.gt.ftz.f32	%p19, %f48, 0f3C9374BC;
	@%p19 bra 	BB0_26;

	mul.ftz.f32 	%f185, %f48, 0f40900000;
	bra.uni 	BB0_27;

BB0_26:
	lg2.approx.ftz.f32 	%f167, %f48;
	mul.ftz.f32 	%f168, %f167, 0f3EE66666;
	ex2.approx.ftz.f32 	%f169, %f168;
	fma.rn.ftz.f32 	%f185, %f169, 0f3F8CAC08, 0fBDCAC083;

BB0_27:
	neg.ftz.f32 	%f170, %f183;
	setp.lt.ftz.f32	%p20, %f43, 0f00000000;
	selp.f32	%f58, %f170, %f183, %p20;
	neg.ftz.f32 	%f171, %f184;
	setp.lt.ftz.f32	%p21, %f44, 0f00000000;
	selp.f32	%f59, %f171, %f184, %p21;
	neg.ftz.f32 	%f172, %f185;
	setp.lt.ftz.f32	%p22, %f45, 0f00000000;
	selp.f32	%f60, %f172, %f185, %p22;
	@%p4 bra 	BB0_29;

	cvta.to.global.u64 	%rd8, %rd1;
	mad.lo.s32 	%r39, %r12, %r1, %r8;
	mul.wide.s32 	%rd9, %r39, 16;
	add.s64 	%rd10, %rd8, %rd9;
	st.global.v4.f32 	[%rd10], {%f60, %f59, %f58, %f176};
	bra.uni 	BB0_30;

BB0_29:
	cvta.to.global.u64 	%rd11, %rd1;
	mad.lo.s32 	%r48, %r12, %r1, %r8;
	mul.wide.s32 	%rd12, %r48, 8;
	add.s64 	%rd13, %rd11, %rd12;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f176;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f58;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f59;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f60;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd13], {%rs12, %rs11, %rs10, %rs9};

BB0_30:
	ret;
}


