Add a memclr() SSE-optimized function

2022-10-02 19:13:33 +02:00 · 2022-10-02 19:13:33 +02:00 · db9e1ba17c
commit db9e1ba17c
parent 38470724dc
2 changed files with 45 additions and 0 deletions
--- a/libs/libc/include/string.h
+++ b/libs/libc/include/string.h
@ -15,6 +15,7 @@ extern "C"
    char* strcpy(char*, const char*);
    char* strchr(const char*, int);
    char* strcat(char*, const char*);
+    void* memclr(void*, size_t);

 #ifdef __cplusplus
 }
--- a/libs/libc/src/string.cpp
+++ b/libs/libc/src/string.cpp
@ -45,4 +45,48 @@ extern "C"
    {
        NOT_IMPLEMENTED("strchr");
    }
+
+    void* memclr(void* start, size_t count)
+    {
+        // "i" is our counter of how many bytes we've cleared
+        size_t i;
+
+        // find out if "m_start" is aligned on a SSE_XMM_SIZE boundary
+        if ((size_t)start & (15))
+        {
+            i = 0;
+
+            // we need to clear byte-by-byte until "m_start" is aligned on an SSE_XMM_SIZE boundary
+            // ... and lets make sure we don't copy 'too' many bytes (i < m_count)
+            while (((size_t)start + i) & (15) && i < count)
+            {
+                asm("stosb;" ::"D"((size_t)start + i), "a"(0));
+                i++;
+            }
+        }
+        else
+        {
+            // if "m_start" was aligned, set our count to 0
+            i = 0;
+        }
+
+        // clear 64-byte chunks of memory (4 16-byte operations)
+        for (; i + 64 <= count; i += 64)
+        {
+            asm volatile(" pxor %%xmm0, %%xmm0;	"     // set XMM0 to 0
+                         " movdqa %%xmm0, 0(%0);	" // move 16 bytes from XMM0 to %0 + 0
+                         " movdqa %%xmm0, 16(%0);	"
+                         " movdqa %%xmm0, 32(%0);	"
+                         " movdqa %%xmm0, 48(%0);	" ::"r"((size_t)start + i));
+        }
+
+        // copy the remaining bytes (if any)
+        asm(" rep stosb; " ::"a"((size_t)(0)), "D"(((size_t)start) + i), "c"(count - i));
+
+        // "i" will contain the total amount of bytes that were actually transfered
+        i += count - i;
+
+        // we return "m_start" + the amount of bytes that were transfered
+        return (void*)(((size_t)start) + i);
+    }
 }