html_furi.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <stdbool.h>
  4. #include <html/html_furi.h>
  5. /*
  6. * Helper function: Checks if the substring of the FuriString starting at index `pos`
  7. * matches the given C-string `needle`.
  8. * Returns true if it matches; otherwise false.
  9. */
  10. static bool furi_string_sub_equals(FuriString *str, int pos, const char *needle)
  11. {
  12. size_t needle_len = strlen(needle);
  13. if (pos + needle_len > furi_string_size(str))
  14. {
  15. return false;
  16. }
  17. for (size_t i = 0; i < needle_len; i++)
  18. {
  19. if (furi_string_get_char(str, pos + i) != needle[i])
  20. {
  21. return false;
  22. }
  23. }
  24. return true;
  25. }
  26. /*
  27. * @brief Parse a Furigana string from an HTML tag, handling nested child tags.
  28. *
  29. * This version accepts an HTML tag as a C-string (e.g., "<p>") and searches
  30. * for the content inside the corresponding opening and closing tags within
  31. * the provided HTML string, taking into account nested occurrences of the tag.
  32. *
  33. * For example, given the HTML string:
  34. * "<p><h1><p><h1>Test</h1></p></h1></p>"
  35. * and searching with tag "<p>" the function will return:
  36. * "<h1><p><h1>Test</h1></p></h1>"
  37. *
  38. * @param tag The HTML tag to parse (including the angle brackets).
  39. * @param html The HTML string to parse (as a FuriString).
  40. * @return A newly allocated FuriString containing the parsed content,
  41. * or an empty FuriString if the tag is not found.
  42. */
  43. FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
  44. {
  45. int tag_len = strlen(tag);
  46. // Ensure the tag is at least 3 characters long (e.g., "<p>")
  47. if (tag_len < 3)
  48. {
  49. FURI_LOG_E("html_furi_parse", "Invalid tag length");
  50. return NULL;
  51. }
  52. // Extract the inner tag name from the provided C-string tag.
  53. // For example, for "<p>" extract "p".
  54. int inner_len = tag_len - 2; // Exclude the '<' and '>'
  55. char inner_tag[inner_len + 1];
  56. for (int i = 0; i < inner_len; i++)
  57. {
  58. inner_tag[i] = tag[i + 1];
  59. }
  60. inner_tag[inner_len] = '\0';
  61. // Build the expected closing tag as a C-string (e.g., "</p>").
  62. int closing_tag_size = inner_len + 4; // "</" + inner tag + ">" + '\0'
  63. char closing_tag[closing_tag_size];
  64. snprintf(closing_tag, closing_tag_size, "</%s>", inner_tag);
  65. // Find the opening tag in the HTML.
  66. // Locate the first occurrence of the opening tag.
  67. int html_len = furi_string_size(html);
  68. int open_tag_index = -1;
  69. for (int i = index; i <= html_len - tag_len; i++)
  70. {
  71. if (furi_string_sub_equals(html, i, tag))
  72. {
  73. open_tag_index = i;
  74. break;
  75. }
  76. }
  77. if (open_tag_index == -1)
  78. {
  79. // Opening tag not found; return an empty FuriString.
  80. FURI_LOG_E("html_furi_parse", "Opening tag not found");
  81. return NULL;
  82. }
  83. // Content starts immediately after the opening tag.
  84. int content_start = open_tag_index + tag_len;
  85. // Skip any leading whitespace.
  86. while (content_start < html_len && furi_string_get_char(html, content_start) == ' ')
  87. {
  88. content_start++;
  89. }
  90. // Now search for the matching closing tag. We use a depth counter to handle nested tags.
  91. int depth = 1;
  92. int i = content_start;
  93. int matching_close_index = -1;
  94. while (i <= html_len - 1)
  95. {
  96. // Check for opening tag first.
  97. if (furi_string_sub_equals(html, i, tag))
  98. {
  99. depth++;
  100. i += tag_len;
  101. continue;
  102. }
  103. // Check for closing tag.
  104. if (furi_string_sub_equals(html, i, closing_tag))
  105. {
  106. depth--;
  107. if (depth == 0)
  108. {
  109. matching_close_index = i;
  110. break;
  111. }
  112. i += strlen(closing_tag);
  113. continue;
  114. }
  115. i++;
  116. }
  117. if (matching_close_index == -1)
  118. {
  119. // Matching closing tag not found; return an empty FuriString.
  120. FURI_LOG_E("html_furi_parse", "Matching closing tag not found");
  121. return NULL;
  122. }
  123. // The content spans from content_start up to matching_close_index.
  124. size_t content_length = matching_close_index - content_start;
  125. if (memmgr_get_free_heap() < (content_length + 1 + 4096)) // 4KB buffer
  126. {
  127. FURI_LOG_E("html_furi_parse", "Not enough heap to allocate result");
  128. return NULL;
  129. }
  130. // Allocate the result string and copy the content.
  131. FuriString *result = furi_string_alloc();
  132. furi_string_reserve(result, content_length);
  133. furi_string_set_n(result, html, content_start, content_length);
  134. return result;
  135. }
  136. /*
  137. * @brief Parse all Furigana strings from an HTML tag, handling nested child tags.
  138. * @param tag The HTML tag to parse (including the angle brackets).
  139. * @param html The HTML string to parse (as a FuriString).
  140. * @return A newly allocated FuriString containing the parsed content,
  141. * or an empty FuriString if the tag is not found.
  142. */
  143. FuriString *html_furi_find_tags(const char *tag, FuriString *html)
  144. {
  145. FuriString *result = furi_string_alloc();
  146. size_t index = 0;
  147. while (true)
  148. {
  149. FuriString *parsed = html_furi_find_tag(tag, html, index);
  150. if (parsed == NULL)
  151. {
  152. break;
  153. }
  154. furi_string_cat(result, parsed);
  155. furi_string_free(parsed);
  156. // start after the strlen(tag)
  157. // this is so we don't miss the inner tags
  158. // I may change this to: index += furi_string_size(parsed)
  159. index += strlen(tag);
  160. }
  161. return result;
  162. }