diff --git a/libc/Makefile b/libc/Makefile
index d17eef32..1267515a 100644
--- a/libc/Makefile
+++ b/libc/Makefile
@@ -78,6 +78,7 @@ ldiv.o \
lldiv.o \
localtime.o \
localtime_r.o \
+mbrlen.o \
mbrtowc.o \
mbsrtowcs.o \
mbstowcs.o \
@@ -94,8 +95,8 @@ setbuf.o \
setvbuf.o \
sigaddset.o \
sigdelset.o \
-sigfillset.o \
sigemptyset.o \
+sigfillset.o \
sigismember.o \
sort.o \
sprint.o \
diff --git a/libc/include/wchar.h b/libc/include/wchar.h
index 7c86d0c6..e36b6bf3 100644
--- a/libc/include/wchar.h
+++ b/libc/include/wchar.h
@@ -63,6 +63,7 @@ struct tm;
size_t mbsrtowcs(wchar_t* __restrict, const char** __restrict, size_t, mbstate_t* __restrict);
size_t wcrtomb(char* __restrict, wchar_t, mbstate_t* __restrict);
+size_t mbrlen(const char* __restrict, size_t, mbstate_t* __restrict);
size_t mbrtowc(wchar_t* __restrict, const char* __restrict, size_t, mbstate_t* __restrict);
wchar_t* wcscat(wchar_t* __restrict, const wchar_t* __restrict);
wchar_t* wcschr(const wchar_t*, wchar_t);
@@ -107,7 +108,6 @@ int wscanf(const wchar_t* __restrict, ...);
long double wcstold(const wchar_t* __restrict, wchar_t** __restrict);
long long wcstoll(const wchar_t* __restrict, wchar_t** __restrict, int);
long wcstol(const wchar_t* __restrict, wchar_t** __restrict, int);
-size_t mbrlen(const char* __restrict, size_t, mbstate_t* __restrict);
size_t wcsftime(wchar_t* __restrict, size_t, const wchar_t* __restrict, const struct tm* __restrict);
size_t wcsxfrm(wchar_t* __restrict, const wchar_t* __restrict, size_t);
unsigned long long wcstoull(const wchar_t* __restrict, wchar_t** __restrict, int);
diff --git a/libc/mbrlen.cpp b/libc/mbrlen.cpp
new file mode 100644
index 00000000..aa4601ac
--- /dev/null
+++ b/libc/mbrlen.cpp
@@ -0,0 +1,85 @@
+/*******************************************************************************
+
+ Copyright(C) Jonas 'Sortie' Termansen 2013.
+
+ This file is part of the Sortix C Library.
+
+ The Sortix C Library is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or (at your
+ option) any later version.
+
+ The Sortix C Library is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with the Sortix C Library. If not, see .
+
+ mbrlen.cpp
+ Determine number of bytes in next multibyte character.
+
+*******************************************************************************/
+
+#include
+#include
+#include
+
+static size_t utf8_header_length(unsigned char uc)
+{
+ if ( (uc & 0b11000000) == 0b10000000 )
+ return 0;
+ if ( (uc & 0b10000000) == 0b00000000 )
+ return 1;
+ if ( (uc & 0b11100000) == 0b11000000 )
+ return 2;
+ if ( (uc & 0b11110000) == 0b11100000 )
+ return 3;
+ if ( (uc & 0b11111000) == 0b11110000 )
+ return 4;
+ if ( (uc & 0b11111100) == 0b11111000 )
+ return 5;
+ if ( (uc & 0b11111110) == 0b11111100 )
+ return 6;
+ return (size_t) -1;
+}
+
+// TODO: Use the shift state.
+extern "C"
+size_t mbrlen(const char* restrict s, size_t n, mbstate_t* restrict ps)
+{
+ size_t expected_length;
+
+ for ( size_t i = 0; i < n; i++ )
+ {
+ unsigned char uc = (unsigned char) s[i];
+
+ if ( i == 0 )
+ {
+ if ( !uc )
+ {
+ memset(ps, 0, sizeof(*ps));
+ return 0;
+ }
+
+ if ( (expected_length = utf8_header_length(uc)) == (size_t) -1 )
+ return errno = EILSEQ, (size_t) -1;
+
+ // Check if we encounted an unexpected character claiming to be in
+ // the middle of a UTF-8 multibyte sequence (10xxxxxx).
+ if ( expected_length == 0 )
+ // TODO: Should we play catch up with the partial sequence?
+ return errno = EILSEQ, (size_t) -1;
+ }
+
+ // All non-header bytes should be of the form 10xxxxxx.
+ if ( 0 < i && expected_length < n && (uc & 0b11000000) != 0b10000000 )
+ return errno = EILSEQ, (size_t) -1;
+
+ if ( i + 1 == expected_length )
+ return i + 1;
+ }
+
+ return (size_t) -2;
+}