Add a memclr() SSE-optimized function
This commit is contained in:
parent
38470724dc
commit
db9e1ba17c
@ -15,6 +15,7 @@ extern "C"
|
||||
char* strcpy(char*, const char*);
|
||||
char* strchr(const char*, int);
|
||||
char* strcat(char*, const char*);
|
||||
void* memclr(void*, size_t);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -45,4 +45,48 @@ extern "C"
|
||||
{
|
||||
NOT_IMPLEMENTED("strchr");
|
||||
}
|
||||
|
||||
void* memclr(void* start, size_t count)
|
||||
{
|
||||
// "i" is our counter of how many bytes we've cleared
|
||||
size_t i;
|
||||
|
||||
// find out if "m_start" is aligned on a SSE_XMM_SIZE boundary
|
||||
if ((size_t)start & (15))
|
||||
{
|
||||
i = 0;
|
||||
|
||||
// we need to clear byte-by-byte until "m_start" is aligned on an SSE_XMM_SIZE boundary
|
||||
// ... and lets make sure we don't copy 'too' many bytes (i < m_count)
|
||||
while (((size_t)start + i) & (15) && i < count)
|
||||
{
|
||||
asm("stosb;" ::"D"((size_t)start + i), "a"(0));
|
||||
i++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// if "m_start" was aligned, set our count to 0
|
||||
i = 0;
|
||||
}
|
||||
|
||||
// clear 64-byte chunks of memory (4 16-byte operations)
|
||||
for (; i + 64 <= count; i += 64)
|
||||
{
|
||||
asm volatile(" pxor %%xmm0, %%xmm0; " // set XMM0 to 0
|
||||
" movdqa %%xmm0, 0(%0); " // move 16 bytes from XMM0 to %0 + 0
|
||||
" movdqa %%xmm0, 16(%0); "
|
||||
" movdqa %%xmm0, 32(%0); "
|
||||
" movdqa %%xmm0, 48(%0); " ::"r"((size_t)start + i));
|
||||
}
|
||||
|
||||
// copy the remaining bytes (if any)
|
||||
asm(" rep stosb; " ::"a"((size_t)(0)), "D"(((size_t)start) + i), "c"(count - i));
|
||||
|
||||
// "i" will contain the total amount of bytes that were actually transfered
|
||||
i += count - i;
|
||||
|
||||
// we return "m_start" + the amount of bytes that were transfered
|
||||
return (void*)(((size_t)start) + i);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user