utf8n.h 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /*
  3. * Copyright (c) 2014 SGI.
  4. * All rights reserved.
  5. */
  6. #ifndef UTF8NORM_H
  7. #define UTF8NORM_H
  8. #include <linux/types.h>
  9. #include <linux/export.h>
  10. #include <linux/string.h>
  11. #include <linux/module.h>
  12. #include <linux/unicode.h>
  13. int utf8version_is_supported(const struct unicode_map *um, unsigned int version);
  14. /*
  15. * Determine the length of the normalized from of the string,
  16. * excluding any terminating NULL byte.
  17. * Returns 0 if only ignorable code points are present.
  18. * Returns -1 if the input is not valid UTF-8.
  19. */
  20. ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
  21. const char *s, size_t len);
  22. /* Needed in struct utf8cursor below. */
  23. #define UTF8HANGULLEAF (12)
  24. /*
  25. * Cursor structure used by the normalizer.
  26. */
  27. struct utf8cursor {
  28. const struct unicode_map *um;
  29. enum utf8_normalization n;
  30. const char *s;
  31. const char *p;
  32. const char *ss;
  33. const char *sp;
  34. unsigned int len;
  35. unsigned int slen;
  36. short int ccc;
  37. short int nccc;
  38. unsigned char hangul[UTF8HANGULLEAF];
  39. };
  40. /*
  41. * Initialize a utf8cursor to normalize a string.
  42. * Returns 0 on success.
  43. * Returns -1 on failure.
  44. */
  45. int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
  46. enum utf8_normalization n, const char *s, size_t len);
  47. /*
  48. * Get the next byte in the normalization.
  49. * Returns a value > 0 && < 256 on success.
  50. * Returns 0 when the end of the normalization is reached.
  51. * Returns -1 if the string being normalized is not valid UTF-8.
  52. */
  53. extern int utf8byte(struct utf8cursor *u8c);
  54. struct utf8data {
  55. unsigned int maxage;
  56. unsigned int offset;
  57. };
  58. struct utf8data_table {
  59. const unsigned int *utf8agetab;
  60. int utf8agetab_size;
  61. const struct utf8data *utf8nfdicfdata;
  62. int utf8nfdicfdata_size;
  63. const struct utf8data *utf8nfdidata;
  64. int utf8nfdidata_size;
  65. const unsigned char *utf8data;
  66. };
  67. extern struct utf8data_table utf8_data_table;
  68. #endif /* UTF8NORM_H */