NLS: improve UTF8 -> UTF16 string conversion routine
The utf8s_to_utf16s conversion routine needs to be improved. Unlike its utf16s_to_utf8s sibling, it doesn't accept arguments specifying the maximum length of the output buffer or the endianness of its 16-bit output. This patch (as1501) adds the two missing arguments, and adjusts the only two places in the kernel where the function is called. A follow-on patch will add a third caller that does utilize the new capabilities. The two conversion routines are still annoyingly inconsistent in the way they handle invalid byte combinations. But that's a subject for a different patch. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> CC: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
This commit is contained in:

committed by
Greg Kroah-Hartman

parent
b7463c71fb
commit
0720a06a75
@@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
|
||||
}
|
||||
EXPORT_SYMBOL(utf32_to_utf8);
|
||||
|
||||
int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
|
||||
static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
|
||||
{
|
||||
switch (endian) {
|
||||
default:
|
||||
*s = (wchar_t) c;
|
||||
break;
|
||||
case UTF16_LITTLE_ENDIAN:
|
||||
*s = __cpu_to_le16(c);
|
||||
break;
|
||||
case UTF16_BIG_ENDIAN:
|
||||
*s = __cpu_to_be16(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
|
||||
wchar_t *pwcs, int maxlen)
|
||||
{
|
||||
u16 *op;
|
||||
int size;
|
||||
unicode_t u;
|
||||
|
||||
op = pwcs;
|
||||
while (*s && len > 0) {
|
||||
while (len > 0 && maxlen > 0 && *s) {
|
||||
if (*s & 0x80) {
|
||||
size = utf8_to_utf32(s, len, &u);
|
||||
if (size < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (u >= PLANE_SIZE) {
|
||||
u -= PLANE_SIZE;
|
||||
*op++ = (wchar_t) (SURROGATE_PAIR |
|
||||
((u >> 10) & SURROGATE_BITS));
|
||||
*op++ = (wchar_t) (SURROGATE_PAIR |
|
||||
SURROGATE_LOW |
|
||||
(u & SURROGATE_BITS));
|
||||
} else {
|
||||
*op++ = (wchar_t) u;
|
||||
}
|
||||
s += size;
|
||||
len -= size;
|
||||
|
||||
if (u >= PLANE_SIZE) {
|
||||
if (maxlen < 2)
|
||||
break;
|
||||
u -= PLANE_SIZE;
|
||||
put_utf16(op++, SURROGATE_PAIR |
|
||||
((u >> 10) & SURROGATE_BITS),
|
||||
endian);
|
||||
put_utf16(op++, SURROGATE_PAIR |
|
||||
SURROGATE_LOW |
|
||||
(u & SURROGATE_BITS),
|
||||
endian);
|
||||
maxlen -= 2;
|
||||
} else {
|
||||
put_utf16(op++, u, endian);
|
||||
maxlen--;
|
||||
}
|
||||
} else {
|
||||
*op++ = *s++;
|
||||
put_utf16(op++, *s++, endian);
|
||||
len--;
|
||||
maxlen--;
|
||||
}
|
||||
}
|
||||
return op - pwcs;
|
||||
|
Reference in New Issue
Block a user