diff --git a/libs/libc/include/string.h b/libs/libc/include/string.h index a20b2fe7..9c441fb6 100644 --- a/libs/libc/include/string.h +++ b/libs/libc/include/string.h @@ -15,6 +15,7 @@ extern "C" char* strcpy(char*, const char*); char* strchr(const char*, int); char* strcat(char*, const char*); + void* memclr(void*, size_t); #ifdef __cplusplus } diff --git a/libs/libc/src/string.cpp b/libs/libc/src/string.cpp index bd696df4..bf3e288b 100644 --- a/libs/libc/src/string.cpp +++ b/libs/libc/src/string.cpp @@ -45,4 +45,48 @@ extern "C" { NOT_IMPLEMENTED("strchr"); } + + void* memclr(void* start, size_t count) + { + // "i" is our counter of how many bytes we've cleared + size_t i; + + // find out if "m_start" is aligned on a SSE_XMM_SIZE boundary + if ((size_t)start & (15)) + { + i = 0; + + // we need to clear byte-by-byte until "m_start" is aligned on an SSE_XMM_SIZE boundary + // ... and lets make sure we don't copy 'too' many bytes (i < m_count) + while (((size_t)start + i) & (15) && i < count) + { + asm("stosb;" ::"D"((size_t)start + i), "a"(0)); + i++; + } + } + else + { + // if "m_start" was aligned, set our count to 0 + i = 0; + } + + // clear 64-byte chunks of memory (4 16-byte operations) + for (; i + 64 <= count; i += 64) + { + asm volatile(" pxor %%xmm0, %%xmm0; " // set XMM0 to 0 + " movdqa %%xmm0, 0(%0); " // move 16 bytes from XMM0 to %0 + 0 + " movdqa %%xmm0, 16(%0); " + " movdqa %%xmm0, 32(%0); " + " movdqa %%xmm0, 48(%0); " ::"r"((size_t)start + i)); + } + + // copy the remaining bytes (if any) + asm(" rep stosb; " ::"a"((size_t)(0)), "D"(((size_t)start) + i), "c"(count - i)); + + // "i" will contain the total amount of bytes that were actually transfered + i += count - i; + + // we return "m_start" + the amount of bytes that were transfered + return (void*)(((size_t)start) + i); + } } \ No newline at end of file