charset.c 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. /*
  2. * charset conversion utils
  3. *
  4. * Copyright (c) 2017 Rob Clark
  5. *
  6. * SPDX-License-Identifier: GPL-2.0+
  7. */
  8. #include <common.h>
  9. #include <charset.h>
  10. #include <malloc.h>
  11. /*
  12. * utf8/utf16 conversion mostly lifted from grub
  13. */
  14. size_t utf16_strlen(const uint16_t *in)
  15. {
  16. size_t i;
  17. for (i = 0; in[i]; i++);
  18. return i;
  19. }
  20. size_t utf16_strnlen(const uint16_t *in, size_t count)
  21. {
  22. size_t i;
  23. for (i = 0; count-- && in[i]; i++);
  24. return i;
  25. }
  26. uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src)
  27. {
  28. uint16_t *tmp = dest;
  29. while ((*dest++ = *src++) != '\0')
  30. /* nothing */;
  31. return tmp;
  32. }
  33. uint16_t *utf16_strdup(const uint16_t *s)
  34. {
  35. uint16_t *new;
  36. if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2)))
  37. return NULL;
  38. utf16_strcpy(new, s);
  39. return new;
  40. }
  41. /* Convert UTF-16 to UTF-8. */
  42. uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
  43. {
  44. uint32_t code_high = 0;
  45. while (size--) {
  46. uint32_t code = *src++;
  47. if (code_high) {
  48. if (code >= 0xDC00 && code <= 0xDFFF) {
  49. /* Surrogate pair. */
  50. code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
  51. *dest++ = (code >> 18) | 0xF0;
  52. *dest++ = ((code >> 12) & 0x3F) | 0x80;
  53. *dest++ = ((code >> 6) & 0x3F) | 0x80;
  54. *dest++ = (code & 0x3F) | 0x80;
  55. } else {
  56. /* Error... */
  57. *dest++ = '?';
  58. /* *src may be valid. Don't eat it. */
  59. src--;
  60. }
  61. code_high = 0;
  62. } else {
  63. if (code <= 0x007F) {
  64. *dest++ = code;
  65. } else if (code <= 0x07FF) {
  66. *dest++ = (code >> 6) | 0xC0;
  67. *dest++ = (code & 0x3F) | 0x80;
  68. } else if (code >= 0xD800 && code <= 0xDBFF) {
  69. code_high = code;
  70. continue;
  71. } else if (code >= 0xDC00 && code <= 0xDFFF) {
  72. /* Error... */
  73. *dest++ = '?';
  74. } else if (code < 0x10000) {
  75. *dest++ = (code >> 12) | 0xE0;
  76. *dest++ = ((code >> 6) & 0x3F) | 0x80;
  77. *dest++ = (code & 0x3F) | 0x80;
  78. } else {
  79. *dest++ = (code >> 18) | 0xF0;
  80. *dest++ = ((code >> 12) & 0x3F) | 0x80;
  81. *dest++ = ((code >> 6) & 0x3F) | 0x80;
  82. *dest++ = (code & 0x3F) | 0x80;
  83. }
  84. }
  85. }
  86. return dest;
  87. }