Add a memclr() SSE-optimized function
This commit is contained in:
parent
38470724dc
commit
db9e1ba17c
@ -15,6 +15,7 @@ extern "C"
|
|||||||
char* strcpy(char*, const char*);
|
char* strcpy(char*, const char*);
|
||||||
char* strchr(const char*, int);
|
char* strchr(const char*, int);
|
||||||
char* strcat(char*, const char*);
|
char* strcat(char*, const char*);
|
||||||
|
void* memclr(void*, size_t);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -45,4 +45,48 @@ extern "C"
|
|||||||
{
|
{
|
||||||
NOT_IMPLEMENTED("strchr");
|
NOT_IMPLEMENTED("strchr");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void* memclr(void* start, size_t count)
|
||||||
|
{
|
||||||
|
// "i" is our counter of how many bytes we've cleared
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
// find out if "m_start" is aligned on a SSE_XMM_SIZE boundary
|
||||||
|
if ((size_t)start & (15))
|
||||||
|
{
|
||||||
|
i = 0;
|
||||||
|
|
||||||
|
// we need to clear byte-by-byte until "m_start" is aligned on an SSE_XMM_SIZE boundary
|
||||||
|
// ... and lets make sure we don't copy 'too' many bytes (i < m_count)
|
||||||
|
while (((size_t)start + i) & (15) && i < count)
|
||||||
|
{
|
||||||
|
asm("stosb;" ::"D"((size_t)start + i), "a"(0));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// if "m_start" was aligned, set our count to 0
|
||||||
|
i = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// clear 64-byte chunks of memory (4 16-byte operations)
|
||||||
|
for (; i + 64 <= count; i += 64)
|
||||||
|
{
|
||||||
|
asm volatile(" pxor %%xmm0, %%xmm0; " // set XMM0 to 0
|
||||||
|
" movdqa %%xmm0, 0(%0); " // move 16 bytes from XMM0 to %0 + 0
|
||||||
|
" movdqa %%xmm0, 16(%0); "
|
||||||
|
" movdqa %%xmm0, 32(%0); "
|
||||||
|
" movdqa %%xmm0, 48(%0); " ::"r"((size_t)start + i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy the remaining bytes (if any)
|
||||||
|
asm(" rep stosb; " ::"a"((size_t)(0)), "D"(((size_t)start) + i), "c"(count - i));
|
||||||
|
|
||||||
|
// "i" will contain the total amount of bytes that were actually transfered
|
||||||
|
i += count - i;
|
||||||
|
|
||||||
|
// we return "m_start" + the amount of bytes that were transfered
|
||||||
|
return (void*)(((size_t)start) + i);
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user