Add a memclr() SSE-optimized function

This commit is contained in:
apio 2022-10-02 19:13:33 +02:00
parent 38470724dc
commit db9e1ba17c
2 changed files with 45 additions and 0 deletions

View File

@ -15,6 +15,7 @@ extern "C"
char* strcpy(char*, const char*);
char* strchr(const char*, int);
char* strcat(char*, const char*);
void* memclr(void*, size_t);
#ifdef __cplusplus
}

View File

@ -45,4 +45,48 @@ extern "C"
{
NOT_IMPLEMENTED("strchr");
}
void* memclr(void* start, size_t count)
{
// "i" is our counter of how many bytes we've cleared
size_t i;
// find out if "m_start" is aligned on a SSE_XMM_SIZE boundary
if ((size_t)start & (15))
{
i = 0;
// we need to clear byte-by-byte until "m_start" is aligned on an SSE_XMM_SIZE boundary
// ... and lets make sure we don't copy 'too' many bytes (i < m_count)
while (((size_t)start + i) & (15) && i < count)
{
asm("stosb;" ::"D"((size_t)start + i), "a"(0));
i++;
}
}
else
{
// if "m_start" was aligned, set our count to 0
i = 0;
}
// clear 64-byte chunks of memory (4 16-byte operations)
for (; i + 64 <= count; i += 64)
{
asm volatile(" pxor %%xmm0, %%xmm0; " // set XMM0 to 0
" movdqa %%xmm0, 0(%0); " // move 16 bytes from XMM0 to %0 + 0
" movdqa %%xmm0, 16(%0); "
" movdqa %%xmm0, 32(%0); "
" movdqa %%xmm0, 48(%0); " ::"r"((size_t)start + i));
}
// copy the remaining bytes (if any)
asm(" rep stosb; " ::"a"((size_t)(0)), "D"(((size_t)start) + i), "c"(count - i));
// "i" will contain the total amount of bytes that were actually transfered
i += count - i;
// we return "m_start" + the amount of bytes that were transfered
return (void*)(((size_t)start) + i);
}
}