Add mbrtowc(3), mbtowc(3), wcrtomb(3), wctomb(3).
This commit is contained in:
parent
e52c6c0966
commit
05219a27f2
|
@ -71,6 +71,7 @@ heap.o \
|
||||||
integer.o \
|
integer.o \
|
||||||
localtime.o \
|
localtime.o \
|
||||||
localtime_r.o \
|
localtime_r.o \
|
||||||
|
mbrtowc.o \
|
||||||
mbtowc.o \
|
mbtowc.o \
|
||||||
memccpy.o \
|
memccpy.o \
|
||||||
memchr.o \
|
memchr.o \
|
||||||
|
@ -114,6 +115,8 @@ timespec.o \
|
||||||
ungetc.o \
|
ungetc.o \
|
||||||
vfscanf.o \
|
vfscanf.o \
|
||||||
vsscanf.o \
|
vsscanf.o \
|
||||||
|
wcrtomb.o \
|
||||||
|
wctomb.o \
|
||||||
|
|
||||||
HOSTEDOBJS=\
|
HOSTEDOBJS=\
|
||||||
access.o \
|
access.o \
|
||||||
|
|
|
@ -60,6 +60,7 @@ void free(void*);
|
||||||
long labs(long);
|
long labs(long);
|
||||||
long long llabs(long long);
|
long long llabs(long long);
|
||||||
void* malloc(size_t);
|
void* malloc(size_t);
|
||||||
|
int mbtowc(wchar_t *restrict, const char* restrict, size_t);
|
||||||
#if !defined(_SORTIX_SOURCE)
|
#if !defined(_SORTIX_SOURCE)
|
||||||
char* mktemp(char* templ);
|
char* mktemp(char* templ);
|
||||||
#endif
|
#endif
|
||||||
|
@ -74,6 +75,7 @@ unsigned long strtoul(const char* restrict, char** restrict, int);
|
||||||
unsigned long long strtoull(const char* restrict, char** restrict, int);
|
unsigned long long strtoull(const char* restrict, char** restrict, int);
|
||||||
long long strtoll(const char* restrict, char** restrict, int);
|
long long strtoll(const char* restrict, char** restrict, int);
|
||||||
int unsetenv(const char*);
|
int unsetenv(const char*);
|
||||||
|
int wctomb(char*, wchar_t);
|
||||||
|
|
||||||
#if defined(_SORTIX_SOURCE) || defined(_WANT_SORTIX_ENV)
|
#if defined(_SORTIX_SOURCE) || defined(_WANT_SORTIX_ENV)
|
||||||
const char* const* getenviron(void);
|
const char* const* getenviron(void);
|
||||||
|
@ -110,7 +112,6 @@ lldiv_t lldiv(long long, long long);
|
||||||
long lrand48(void);
|
long lrand48(void);
|
||||||
int mblen(const char*, size_t);
|
int mblen(const char*, size_t);
|
||||||
size_t mbstowcs(wchar_t *restrict, const char* restrict, size_t);
|
size_t mbstowcs(wchar_t *restrict, const char* restrict, size_t);
|
||||||
int mbtowc(wchar_t *restrict, const char* restrict, size_t);
|
|
||||||
char* mkdtemp(char*);
|
char* mkdtemp(char*);
|
||||||
int mkstemp(char*);
|
int mkstemp(char*);
|
||||||
long mrand48(void);
|
long mrand48(void);
|
||||||
|
@ -132,7 +133,6 @@ long double strtold(const char* restrict, char** restrict);
|
||||||
int system(const char*);
|
int system(const char*);
|
||||||
int unlockpt(int);
|
int unlockpt(int);
|
||||||
size_t wcstombs(char* restrict, const wchar_t *restrict, size_t);
|
size_t wcstombs(char* restrict, const wchar_t *restrict, size_t);
|
||||||
int wctomb(char*, wchar_t);
|
|
||||||
|
|
||||||
#if __POSIX_OBSOLETE <= 200801
|
#if __POSIX_OBSOLETE <= 200801
|
||||||
int rand_r(unsigned *);
|
int rand_r(unsigned *);
|
||||||
|
|
|
@ -61,6 +61,9 @@ __BEGIN_DECLS
|
||||||
|
|
||||||
struct tm;
|
struct tm;
|
||||||
|
|
||||||
|
size_t wcrtomb(char* restrict, wchar_t, mbstate_t* restrict);
|
||||||
|
size_t mbrtowc(wchar_t* restrict, const char* restrict, size_t, mbstate_t* restrict);
|
||||||
|
|
||||||
/* TODO: These are not implemented in sortix libc yet. */
|
/* TODO: These are not implemented in sortix libc yet. */
|
||||||
#if defined(__SORTIX_SHOW_UNIMPLEMENTED)
|
#if defined(__SORTIX_SHOW_UNIMPLEMENTED)
|
||||||
double wcstod(const wchar_t* restrict, wchar_t** restrict);
|
double wcstod(const wchar_t* restrict, wchar_t** restrict);
|
||||||
|
@ -92,9 +95,7 @@ long double wcstold(const wchar_t* restrict, wchar_t** restrict);
|
||||||
long long wcstoll(const wchar_t* restrict, wchar_t** restrict, int);
|
long long wcstoll(const wchar_t* restrict, wchar_t** restrict, int);
|
||||||
long wcstol(const wchar_t* restrict, wchar_t** restrict, int);
|
long wcstol(const wchar_t* restrict, wchar_t** restrict, int);
|
||||||
size_t mbrlen(const char* restrict, size_t, mbstate_t* restrict);
|
size_t mbrlen(const char* restrict, size_t, mbstate_t* restrict);
|
||||||
size_t mbrtowc(wchar_t* restrict, const char* restrict, size_t, mbstate_t* restrict);
|
|
||||||
size_t mbsrtowcs(wchar_t* restrict, const char** restrict, size_t, mbstate_t* restrict);
|
size_t mbsrtowcs(wchar_t* restrict, const char** restrict, size_t, mbstate_t* restrict);
|
||||||
size_t wcrtomb(char* restrict, wchar_t, mbstate_t* restrict);
|
|
||||||
size_t wcscspn(const wchar_t*, const wchar_t*);
|
size_t wcscspn(const wchar_t*, const wchar_t*);
|
||||||
size_t wcsftime(wchar_t* restrict, size_t, const wchar_t* restrict, const struct tm* restrict);
|
size_t wcsftime(wchar_t* restrict, size_t, const wchar_t* restrict, const struct tm* restrict);
|
||||||
size_t wcslen(const wchar_t*);
|
size_t wcslen(const wchar_t*);
|
||||||
|
|
|
@ -0,0 +1,105 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
|
||||||
|
Copyright(C) Jonas 'Sortie' Termansen 2012.
|
||||||
|
|
||||||
|
This file is part of the Sortix C Library.
|
||||||
|
|
||||||
|
The Sortix C Library is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or (at your
|
||||||
|
option) any later version.
|
||||||
|
|
||||||
|
The Sortix C Library is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||||
|
License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public License
|
||||||
|
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
mbrtowc.cpp
|
||||||
|
Convert a multibyte sequence to a wide character.
|
||||||
|
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
size_t mbrtowc(wchar_t* restrict pwc, const char* restrict s, size_t n,
|
||||||
|
mbstate_t* restrict /*ps*/)
|
||||||
|
{
|
||||||
|
if ( !s )
|
||||||
|
{
|
||||||
|
// TODO: Restore ps to initial state if currently valid.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
uint8_t* buf = (uint8_t*) s;
|
||||||
|
wchar_t ret = 0;
|
||||||
|
size_t numbytes = 0;
|
||||||
|
size_t sequence_len = 1;
|
||||||
|
while ( numbytes < sequence_len )
|
||||||
|
{
|
||||||
|
if ( numbytes == n )
|
||||||
|
{
|
||||||
|
// TODO: Support restore through the mbstate_t!
|
||||||
|
return (size_t) -2;
|
||||||
|
}
|
||||||
|
uint8_t b = buf[numbytes++];
|
||||||
|
|
||||||
|
bool is_continuation = b >> (8-2) == 0b10;
|
||||||
|
if ( 1 == numbytes && is_continuation )
|
||||||
|
return errno = EILSEQ, (size_t) -1;
|
||||||
|
if ( 2 <= numbytes && !is_continuation )
|
||||||
|
return errno = EILSEQ, (size_t) -1;
|
||||||
|
|
||||||
|
wchar_t new_bits;
|
||||||
|
size_t new_bits_num;
|
||||||
|
if ( b >> (8-1) == 0b0 )
|
||||||
|
new_bits = b & 0b01111111,
|
||||||
|
new_bits_num = 7,
|
||||||
|
sequence_len = 1;
|
||||||
|
else if ( b >> (8-2) == 0b10 )
|
||||||
|
new_bits = b & 0b00111111,
|
||||||
|
new_bits_num = 6,
|
||||||
|
sequence_len = 2;
|
||||||
|
else if ( b >> (8-3) == 0b110 )
|
||||||
|
new_bits = b & 0b00011111,
|
||||||
|
new_bits_num = 5,
|
||||||
|
sequence_len = 3;
|
||||||
|
else if ( b >> (8-4) == 0b1110 )
|
||||||
|
new_bits = b & 0b00001111,
|
||||||
|
new_bits_num = 4,
|
||||||
|
sequence_len = 4;
|
||||||
|
else if ( b >> (8-5) == 0b11110 )
|
||||||
|
new_bits = b & 0b00000111,
|
||||||
|
new_bits_num = 3,
|
||||||
|
sequence_len = 5;
|
||||||
|
else if ( b >> (8-6) == 0b111110 )
|
||||||
|
new_bits = b & 0b00000011,
|
||||||
|
new_bits_num = 2,
|
||||||
|
sequence_len = 6;
|
||||||
|
else if ( b >> (8-7) == 0b1111110 )
|
||||||
|
new_bits = b & 0b00000001,
|
||||||
|
new_bits_num = 1,
|
||||||
|
sequence_len = 7;
|
||||||
|
else
|
||||||
|
return errno = EILSEQ, (size_t) -1;
|
||||||
|
ret = ret >> new_bits_num | new_bits;
|
||||||
|
}
|
||||||
|
if ( !ret )
|
||||||
|
{
|
||||||
|
// TODO: Reset ps to initial state.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if ( (numbytes == 2 && ret <= 0x007F) ||
|
||||||
|
(numbytes == 3 && ret <= 0x07FF) ||
|
||||||
|
(numbytes == 4 && ret <= 0xFFFF) ||
|
||||||
|
(numbytes == 5 && ret <= 0x1FFFFF) ||
|
||||||
|
(numbytes == 6 && ret <= 0x3FFFFFF) )
|
||||||
|
return errno = EILSEQ, (size_t) -1;
|
||||||
|
if ( pwc )
|
||||||
|
*pwc = ret;
|
||||||
|
return numbytes;
|
||||||
|
}
|
|
@ -24,9 +24,10 @@
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
|
||||||
extern "C" int mbtowc(wchar_t* /*pwd*/, const char* /*s*/, size_t /*n*/)
|
// TODO: This function is unpure and should be removed.
|
||||||
|
extern "C" int mbtowc(wchar_t* pwd, const char* s, size_t n)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "mbtowc(3) is not implemented\n");
|
return mbrtowc(pwd, s, n, NULL);
|
||||||
abort();
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
|
||||||
|
Copyright(C) Jonas 'Sortie' Termansen 2012.
|
||||||
|
|
||||||
|
This file is part of the Sortix C Library.
|
||||||
|
|
||||||
|
The Sortix C Library is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or (at your
|
||||||
|
option) any later version.
|
||||||
|
|
||||||
|
The Sortix C Library is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||||
|
License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public License
|
||||||
|
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
wcrtomb.cpp
|
||||||
|
Convert a wide character to a multibyte sequence.
|
||||||
|
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
size_t wcrtomb(char* restrict s, wchar_t wc, mbstate_t* restrict /*ps*/)
|
||||||
|
{
|
||||||
|
if ( !wc )
|
||||||
|
{
|
||||||
|
if ( s )
|
||||||
|
*s = '\0';
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t unicode = wc;
|
||||||
|
uint8_t* buf = (uint8_t*) s;
|
||||||
|
unsigned bytes = 1;
|
||||||
|
unsigned bits = 7;
|
||||||
|
if ( (1U<<7U) <= unicode ) { bytes = 2; bits = 11; }
|
||||||
|
if ( (1U<<11U) <= unicode ) { bytes = 3; bits = 16; }
|
||||||
|
if ( (1U<<16U) <= unicode ) { bytes = 4; bits = 21; }
|
||||||
|
if ( (1U<<21U) <= unicode ) { bytes = 5; bits = 26; }
|
||||||
|
if ( (1U<<26U) <= unicode ) { bytes = 6; bits = 31; }
|
||||||
|
if ( (1U<<31U) <= unicode ) { errno = EILSEQ; return (size_t) -1; }
|
||||||
|
|
||||||
|
if ( !s )
|
||||||
|
return bytes;
|
||||||
|
|
||||||
|
uint8_t prefix;
|
||||||
|
unsigned prefixavai;
|
||||||
|
switch ( bytes )
|
||||||
|
{
|
||||||
|
case 1: prefixavai = 7; prefix = 0b0U << prefixavai; break;
|
||||||
|
case 2: prefixavai = 5; prefix = 0b110U << prefixavai; break;
|
||||||
|
case 3: prefixavai = 4; prefix = 0b1110U << prefixavai; break;
|
||||||
|
case 4: prefixavai = 3; prefix = 0b11110U << prefixavai; break;
|
||||||
|
case 5: prefixavai = 2; prefix = 0b111110U << prefixavai; break;
|
||||||
|
case 6: prefixavai = 1; prefix = 0b1111110U << prefixavai; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put the first bits in the unused area of the prefix.
|
||||||
|
prefix |= unicode >> (bits - prefixavai);
|
||||||
|
*buf++ = prefix;
|
||||||
|
unsigned bitsleft = bits - prefixavai;
|
||||||
|
|
||||||
|
while ( bitsleft )
|
||||||
|
{
|
||||||
|
bitsleft -= 6;
|
||||||
|
uint8_t elembits = (unicode>>bitsleft) & ((1U<<6U)-1U);
|
||||||
|
uint8_t elem = (0b10U<<6U) | elembits;
|
||||||
|
*buf++ = elem;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytes;
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
|
||||||
|
Copyright(C) Jonas 'Sortie' Termansen 2012.
|
||||||
|
|
||||||
|
This file is part of the Sortix C Library.
|
||||||
|
|
||||||
|
The Sortix C Library is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or (at your
|
||||||
|
option) any later version.
|
||||||
|
|
||||||
|
The Sortix C Library is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||||
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||||
|
License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public License
|
||||||
|
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
wctomb.cpp
|
||||||
|
Convert a wide character to a multibyte sequence.
|
||||||
|
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
|
||||||
|
// TODO: This function is unpure and should be removed.
|
||||||
|
extern "C" int wctomb(char* s, wchar_t wc)
|
||||||
|
{
|
||||||
|
return wcrtomb(s, wc, NULL);
|
||||||
|
}
|
Loading…
Reference in New Issue