设为首页 加入收藏

TOP

使用 x86 汇编实现 C# 的快速内存拷贝(三)
2015-02-02 14:31:52 来源: 作者: 【 】 浏览:50
Tags:使用 x86 汇编 实现 快速 内存 拷贝
nException("x64 is not supported yet!");
? }
}


汇编代码被表示成带注释的字节数组:


private static byte[] x86_FastMemCopy_New = new byte[]
{
? 0x90, //nop do nothing
? 0x60, //pushad store flag register on stack
? 0x95, //xchg ebp, eax eax contains memory address of our method
? 0x8B, 0xB5, 0x5A, 0x01, 0x00, 0x00, //mov esi,[ebp][00000015A] get source buffer address
? 0x89, 0xF0, //mov eax,esi
? 0x83, 0xE0, 0x0F, //and eax,00F will check if it is 16 byte aligned
? 0x8B, 0xBD, 0x62, 0x01, 0x00, 0x00, //mov edi,[ebp][000000162] get destination address
? 0x89, 0xFB, //mov ebx,edi
? 0x83, 0xE3, 0x0F, //and ebx,00F will check if it is 16 byte aligned
? 0x8B, 0x8D, 0x6A, 0x01, 0x00, 0x00, //mov ecx,[ebp][00000016A] get number of bytes to copy
? 0xC1, 0xE9, 0x07, //shr ecx,7 divide length by 128
? 0x85, 0xC9, //test ecx,ecx check if zero
? 0x0F, 0x84, 0x1C, 0x01, 0x00, 0x00, //jz 000000146 ↓ copy the rest
? 0x0F, 0x18, 0x06, //prefetchnta [esi] pre-fetch non-temporal source data for reading
? 0x85, 0xC0, //test eax,eax check if source address is 16 byte aligned
? 0x0F, 0x84, 0x8B, 0x00, 0x00, 0x00, //jz 0000000C0 ↓ go to copy if aligned
? 0x0F, 0x18, 0x86, 0x80, 0x02, 0x00, 0x00, //prefetchnta [esi][000000280] pre-fetch more source data
? 0x0F, 0x10, 0x06, //movups xmm0,[esi] copy 16 bytes of source data
? 0x0F, 0x10, 0x4E, 0x10, //movups xmm1,[esi][010] copy more 16 bytes
? 0x0F, 0x10, 0x56, 0x20, //movups xmm2,[esi][020] copy more
? 0x0F, 0x18, 0x86, 0xC0, 0x02, 0x00, 0x00, //prefetchnta [esi][0000002C0] pre-fetch more
? 0x0F, 0x10, 0x5E, 0x30, //movups xmm3,[esi][030]
? 0x0F, 0x10, 0x66, 0x40, //movups xmm4,[esi][040]
? 0x0F, 0x10, 0x6E, 0x50, //movups xmm5,[esi][050]
? 0x0F, 0x10, 0x76, 0x60, //movups xmm6,[esi][060]
? 0x0F, 0x10, 0x7E, 0x70, //movups xmm7,[esi][070] we've copied 128 bytes of source data
? 0x85, 0xDB, //test ebx,ebx check if destination address is 16 byte aligned
? 0x74, 0x21, //jz 000000087 ↓ go to past if aligned
? 0x0F, 0x11, 0x07, //movups [edi],xmm0 past first 16 bytes to non-aligned destination address
? 0x0F, 0x11, 0x4F, 0x10, //movups [edi][010],xmm1 past more
? 0x0F, 0x11, 0x57, 0x20, //movups [edi][020],xmm2
? 0x0F, 0x11, 0x5F, 0x30, //movups [edi][030],xmm3
? 0x0F, 0x11, 0x67, 0x40, //movups [edi][040],xmm4
? 0x0F, 0x11, 0x6F, 0x50, //movups [edi][050],xmm5
? 0x0F, 0x11, 0x77, 0x60, //movups [edi][060],xmm6
? 0x0F, 0x11, 0x7F, 0x70, //movups [edi][070],xmm7 we've pasted 128 bytes of source data
? 0xEB, 0x1F, //jmps 0000000A6 ↓ continue
? 0x0F, 0x2B, 0x07, //movntps [edi],xmm0 past first 16 bytes to aligned destination address
? 0x0F, 0x2B, 0x4F, 0x10, //movntps [edi][010],xmm1 past more
? 0x0F, 0x2B, 0x57, 0x20, //movntps [edi][020],xmm2
? 0x0F, 0x2B, 0x5F, 0x30, //movntps [edi][030],xmm3
? 0x0F, 0x2B, 0x67, 0x40, //movntps [edi][040],xmm4
? 0x0F, 0x2B, 0x6F, 0x50, //movntps [edi][050],xmm5
? 0x0F, 0x2B, 0x77, 0x60, //movntps [edi][060],xmm6
? 0x0F, 0x2B, 0x7F, 0x70, //movntps [edi][070],xmm7 we've pasted 128 bytes of source data
? 0x81, 0xC6, 0x80, 0x00, 0x00, 0x00, //add esi,000000080 increment source address by 128
? 0x81, 0xC7, 0x80, 0x00, 0x00, 0x00, //add edi,000000080 increment destination address by 128
? 0x83, 0xE9, 0x01, //sub ecx,1 decrement counter
? 0x0F, 0x85, 0x7A, 0xFF, 0xFF, 0xFF, //jnz 000000035 ↑ continue if not zero
? 0xE9, 0x86, 0x00, 0x00, 0x00, //jmp 000000146 ↓ go to copy the rest of data
?
? 0x0F, 0x18, 0x86, 0x80, 0x02, 0x00, 0x00, //prefetchnta [esi][000000280] pre-fetch source data
? 0x0F, 0x28, 0x06, //mova

首页 上一页 1 2 3 4 下一页 尾页 3/4/4
】【打印繁体】【投稿】【收藏】 【推荐】【举报】【评论】 【关闭】 【返回顶部
分享到: 
上一篇异常在 PHP 5.3 中的最佳实践 下一篇Java面向对象的三大特征

评论

帐  号: 密码: (新用户注册)
验 证 码:
表  情:
内  容: