diff --git a/libc/Makefile b/libc/Makefile index 85eaf411..74ad6af3 100644 --- a/libc/Makefile +++ b/libc/Makefile @@ -71,6 +71,7 @@ heap.o \ integer.o \ localtime.o \ localtime_r.o \ +mbrtowc.o \ mbtowc.o \ memccpy.o \ memchr.o \ @@ -114,6 +115,8 @@ timespec.o \ ungetc.o \ vfscanf.o \ vsscanf.o \ +wcrtomb.o \ +wctomb.o \ HOSTEDOBJS=\ access.o \ diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h index 765ac2e1..5c528966 100644 --- a/libc/include/stdlib.h +++ b/libc/include/stdlib.h @@ -60,6 +60,7 @@ void free(void*); long labs(long); long long llabs(long long); void* malloc(size_t); +int mbtowc(wchar_t *restrict, const char* restrict, size_t); #if !defined(_SORTIX_SOURCE) char* mktemp(char* templ); #endif @@ -74,6 +75,7 @@ unsigned long strtoul(const char* restrict, char** restrict, int); unsigned long long strtoull(const char* restrict, char** restrict, int); long long strtoll(const char* restrict, char** restrict, int); int unsetenv(const char*); +int wctomb(char*, wchar_t); #if defined(_SORTIX_SOURCE) || defined(_WANT_SORTIX_ENV) const char* const* getenviron(void); @@ -110,7 +112,6 @@ lldiv_t lldiv(long long, long long); long lrand48(void); int mblen(const char*, size_t); size_t mbstowcs(wchar_t *restrict, const char* restrict, size_t); -int mbtowc(wchar_t *restrict, const char* restrict, size_t); char* mkdtemp(char*); int mkstemp(char*); long mrand48(void); @@ -132,7 +133,6 @@ long double strtold(const char* restrict, char** restrict); int system(const char*); int unlockpt(int); size_t wcstombs(char* restrict, const wchar_t *restrict, size_t); -int wctomb(char*, wchar_t); #if __POSIX_OBSOLETE <= 200801 int rand_r(unsigned *); diff --git a/libc/include/wchar.h b/libc/include/wchar.h index fdb306e9..129644cb 100644 --- a/libc/include/wchar.h +++ b/libc/include/wchar.h @@ -61,6 +61,9 @@ __BEGIN_DECLS struct tm; +size_t wcrtomb(char* restrict, wchar_t, mbstate_t* restrict); +size_t mbrtowc(wchar_t* restrict, const char* restrict, size_t, mbstate_t* restrict); + /* TODO: These are not implemented in sortix libc yet. */ #if defined(__SORTIX_SHOW_UNIMPLEMENTED) double wcstod(const wchar_t* restrict, wchar_t** restrict); @@ -92,9 +95,7 @@ long double wcstold(const wchar_t* restrict, wchar_t** restrict); long long wcstoll(const wchar_t* restrict, wchar_t** restrict, int); long wcstol(const wchar_t* restrict, wchar_t** restrict, int); size_t mbrlen(const char* restrict, size_t, mbstate_t* restrict); -size_t mbrtowc(wchar_t* restrict, const char* restrict, size_t, mbstate_t* restrict); size_t mbsrtowcs(wchar_t* restrict, const char** restrict, size_t, mbstate_t* restrict); -size_t wcrtomb(char* restrict, wchar_t, mbstate_t* restrict); size_t wcscspn(const wchar_t*, const wchar_t*); size_t wcsftime(wchar_t* restrict, size_t, const wchar_t* restrict, const struct tm* restrict); size_t wcslen(const wchar_t*); diff --git a/libc/mbrtowc.cpp b/libc/mbrtowc.cpp new file mode 100644 index 00000000..7c9a34dd --- /dev/null +++ b/libc/mbrtowc.cpp @@ -0,0 +1,105 @@ +/******************************************************************************* + + Copyright(C) Jonas 'Sortie' Termansen 2012. + + This file is part of the Sortix C Library. + + The Sortix C Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + The Sortix C Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the Sortix C Library. If not, see . + + mbrtowc.cpp + Convert a multibyte sequence to a wide character. + +*******************************************************************************/ + +#include +#include +#include + +extern "C" +size_t mbrtowc(wchar_t* restrict pwc, const char* restrict s, size_t n, + mbstate_t* restrict /*ps*/) +{ + if ( !s ) + { + // TODO: Restore ps to initial state if currently valid. + return 0; + } + uint8_t* buf = (uint8_t*) s; + wchar_t ret = 0; + size_t numbytes = 0; + size_t sequence_len = 1; + while ( numbytes < sequence_len ) + { + if ( numbytes == n ) + { + // TODO: Support restore through the mbstate_t! + return (size_t) -2; + } + uint8_t b = buf[numbytes++]; + + bool is_continuation = b >> (8-2) == 0b10; + if ( 1 == numbytes && is_continuation ) + return errno = EILSEQ, (size_t) -1; + if ( 2 <= numbytes && !is_continuation ) + return errno = EILSEQ, (size_t) -1; + + wchar_t new_bits; + size_t new_bits_num; + if ( b >> (8-1) == 0b0 ) + new_bits = b & 0b01111111, + new_bits_num = 7, + sequence_len = 1; + else if ( b >> (8-2) == 0b10 ) + new_bits = b & 0b00111111, + new_bits_num = 6, + sequence_len = 2; + else if ( b >> (8-3) == 0b110 ) + new_bits = b & 0b00011111, + new_bits_num = 5, + sequence_len = 3; + else if ( b >> (8-4) == 0b1110 ) + new_bits = b & 0b00001111, + new_bits_num = 4, + sequence_len = 4; + else if ( b >> (8-5) == 0b11110 ) + new_bits = b & 0b00000111, + new_bits_num = 3, + sequence_len = 5; + else if ( b >> (8-6) == 0b111110 ) + new_bits = b & 0b00000011, + new_bits_num = 2, + sequence_len = 6; + else if ( b >> (8-7) == 0b1111110 ) + new_bits = b & 0b00000001, + new_bits_num = 1, + sequence_len = 7; + else + return errno = EILSEQ, (size_t) -1; + ret = ret >> new_bits_num | new_bits; + } + if ( !ret ) + { + // TODO: Reset ps to initial state. + return 0; + } + if ( (numbytes == 2 && ret <= 0x007F) || + (numbytes == 3 && ret <= 0x07FF) || + (numbytes == 4 && ret <= 0xFFFF) || + (numbytes == 5 && ret <= 0x1FFFFF) || + (numbytes == 6 && ret <= 0x3FFFFFF) ) + return errno = EILSEQ, (size_t) -1; + if ( pwc ) + *pwc = ret; + return numbytes; +} diff --git a/libc/mbtowc.cpp b/libc/mbtowc.cpp index fbbea74e..69ef75c2 100644 --- a/libc/mbtowc.cpp +++ b/libc/mbtowc.cpp @@ -24,9 +24,10 @@ #include #include +#include -extern "C" int mbtowc(wchar_t* /*pwd*/, const char* /*s*/, size_t /*n*/) +// TODO: This function is unpure and should be removed. +extern "C" int mbtowc(wchar_t* pwd, const char* s, size_t n) { - fprintf(stderr, "mbtowc(3) is not implemented\n"); - abort(); + return mbrtowc(pwd, s, n, NULL); } diff --git a/libc/wcrtomb.cpp b/libc/wcrtomb.cpp new file mode 100644 index 00000000..20426944 --- /dev/null +++ b/libc/wcrtomb.cpp @@ -0,0 +1,79 @@ +/******************************************************************************* + + Copyright(C) Jonas 'Sortie' Termansen 2012. + + This file is part of the Sortix C Library. + + The Sortix C Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + The Sortix C Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the Sortix C Library. If not, see . + + wcrtomb.cpp + Convert a wide character to a multibyte sequence. + +*******************************************************************************/ + +#include +#include +#include + +extern "C" +size_t wcrtomb(char* restrict s, wchar_t wc, mbstate_t* restrict /*ps*/) +{ + if ( !wc ) + { + if ( s ) + *s = '\0'; + return 1; + } + + uint32_t unicode = wc; + uint8_t* buf = (uint8_t*) s; + unsigned bytes = 1; + unsigned bits = 7; + if ( (1U<<7U) <= unicode ) { bytes = 2; bits = 11; } + if ( (1U<<11U) <= unicode ) { bytes = 3; bits = 16; } + if ( (1U<<16U) <= unicode ) { bytes = 4; bits = 21; } + if ( (1U<<21U) <= unicode ) { bytes = 5; bits = 26; } + if ( (1U<<26U) <= unicode ) { bytes = 6; bits = 31; } + if ( (1U<<31U) <= unicode ) { errno = EILSEQ; return (size_t) -1; } + + if ( !s ) + return bytes; + + uint8_t prefix; + unsigned prefixavai; + switch ( bytes ) + { + case 1: prefixavai = 7; prefix = 0b0U << prefixavai; break; + case 2: prefixavai = 5; prefix = 0b110U << prefixavai; break; + case 3: prefixavai = 4; prefix = 0b1110U << prefixavai; break; + case 4: prefixavai = 3; prefix = 0b11110U << prefixavai; break; + case 5: prefixavai = 2; prefix = 0b111110U << prefixavai; break; + case 6: prefixavai = 1; prefix = 0b1111110U << prefixavai; break; + } + + // Put the first bits in the unused area of the prefix. + prefix |= unicode >> (bits - prefixavai); + *buf++ = prefix; + unsigned bitsleft = bits - prefixavai; + + while ( bitsleft ) + { + bitsleft -= 6; + uint8_t elembits = (unicode>>bitsleft) & ((1U<<6U)-1U); + uint8_t elem = (0b10U<<6U) | elembits; + *buf++ = elem; + } + + return bytes; +} diff --git a/libc/wctomb.cpp b/libc/wctomb.cpp new file mode 100644 index 00000000..478275db --- /dev/null +++ b/libc/wctomb.cpp @@ -0,0 +1,32 @@ +/******************************************************************************* + + Copyright(C) Jonas 'Sortie' Termansen 2012. + + This file is part of the Sortix C Library. + + The Sortix C Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + The Sortix C Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the Sortix C Library. If not, see . + + wctomb.cpp + Convert a wide character to a multibyte sequence. + +*******************************************************************************/ + +#include +#include + +// TODO: This function is unpure and should be removed. +extern "C" int wctomb(char* s, wchar_t wc) +{ + return wcrtomb(s, wc, NULL); +}