|
@@ -4,14 +4,13 @@
|
|
|
#include <html/html_furi.h>
|
|
#include <html/html_furi.h>
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
|
- * Helper function: Checks if the substring of the FuriString starting at index `pos`
|
|
|
|
|
|
|
+ * Checks if the substring of the FuriString starting at index `pos`
|
|
|
* matches the given C-string `needle`.
|
|
* matches the given C-string `needle`.
|
|
|
- * Returns true if it matches; otherwise false.
|
|
|
|
|
*/
|
|
*/
|
|
|
static bool furi_string_sub_equals(FuriString *str, int pos, const char *needle)
|
|
static bool furi_string_sub_equals(FuriString *str, int pos, const char *needle)
|
|
|
{
|
|
{
|
|
|
size_t needle_len = strlen(needle);
|
|
size_t needle_len = strlen(needle);
|
|
|
- if (pos + needle_len > furi_string_size(str))
|
|
|
|
|
|
|
+ if ((size_t)pos + needle_len > furi_string_size(str))
|
|
|
{
|
|
{
|
|
|
return false;
|
|
return false;
|
|
|
}
|
|
}
|
|
@@ -26,36 +25,24 @@ static bool furi_string_sub_equals(FuriString *str, int pos, const char *needle)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
|
- * @brief Parse a Furigana string from an HTML tag, handling nested child tags.
|
|
|
|
|
|
|
+ * Parse the content for a given HTML tag <tag> in `html`, handling nested tags.
|
|
|
|
|
+ * Returns a newly allocated FuriString or NULL on error.
|
|
|
*
|
|
*
|
|
|
- * This version accepts an HTML tag as a C-string (e.g., "<p>") and searches
|
|
|
|
|
- * for the content inside the corresponding opening and closing tags within
|
|
|
|
|
- * the provided HTML string, taking into account nested occurrences of the tag.
|
|
|
|
|
- *
|
|
|
|
|
- * For example, given the HTML string:
|
|
|
|
|
- * "<p><h1><p><h1>Test</h1></p></h1></p>"
|
|
|
|
|
- * and searching with tag "<p>" the function will return:
|
|
|
|
|
- * "<h1><p><h1>Test</h1></p></h1>"
|
|
|
|
|
- *
|
|
|
|
|
- * @param tag The HTML tag to parse (including the angle brackets).
|
|
|
|
|
- * @param html The HTML string to parse (as a FuriString).
|
|
|
|
|
- * @return A newly allocated FuriString containing the parsed content,
|
|
|
|
|
- * or an empty FuriString if the tag is not found.
|
|
|
|
|
|
|
+ * @param tag e.g. "<p>"
|
|
|
|
|
+ * @param html The HTML string to parse.
|
|
|
|
|
+ * @param index The position in `html` from where to start searching.
|
|
|
*/
|
|
*/
|
|
|
FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
|
|
FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
|
|
|
{
|
|
{
|
|
|
int tag_len = strlen(tag);
|
|
int tag_len = strlen(tag);
|
|
|
-
|
|
|
|
|
- // Ensure the tag is at least 3 characters long (e.g., "<p>")
|
|
|
|
|
if (tag_len < 3)
|
|
if (tag_len < 3)
|
|
|
{
|
|
{
|
|
|
FURI_LOG_E("html_furi_parse", "Invalid tag length");
|
|
FURI_LOG_E("html_furi_parse", "Invalid tag length");
|
|
|
return NULL;
|
|
return NULL;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Extract the inner tag name from the provided C-string tag.
|
|
|
|
|
- // For example, for "<p>" extract "p".
|
|
|
|
|
- int inner_len = tag_len - 2; // Exclude the '<' and '>'
|
|
|
|
|
|
|
+ // Extract the tag name from <p> => "p"
|
|
|
|
|
+ int inner_len = tag_len - 2; // exclude '<' and '>'
|
|
|
char inner_tag[inner_len + 1];
|
|
char inner_tag[inner_len + 1];
|
|
|
for (int i = 0; i < inner_len; i++)
|
|
for (int i = 0; i < inner_len; i++)
|
|
|
{
|
|
{
|
|
@@ -63,14 +50,13 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
|
|
|
}
|
|
}
|
|
|
inner_tag[inner_len] = '\0';
|
|
inner_tag[inner_len] = '\0';
|
|
|
|
|
|
|
|
- // Build the expected closing tag as a C-string (e.g., "</p>").
|
|
|
|
|
- int closing_tag_size = inner_len + 4; // "</" + inner tag + ">" + '\0'
|
|
|
|
|
- char closing_tag[closing_tag_size];
|
|
|
|
|
- snprintf(closing_tag, closing_tag_size, "</%s>", inner_tag);
|
|
|
|
|
|
|
+ // Build closing tag => "</p>"
|
|
|
|
|
+ char closing_tag[inner_len + 4];
|
|
|
|
|
+ snprintf(closing_tag, sizeof(closing_tag), "</%s>", inner_tag);
|
|
|
|
|
|
|
|
- // Find the opening tag in the HTML.
|
|
|
|
|
- // Locate the first occurrence of the opening tag.
|
|
|
|
|
int html_len = furi_string_size(html);
|
|
int html_len = furi_string_size(html);
|
|
|
|
|
+
|
|
|
|
|
+ // Find the first occurrence of the opening tag
|
|
|
int open_tag_index = -1;
|
|
int open_tag_index = -1;
|
|
|
for (int i = index; i <= html_len - tag_len; i++)
|
|
for (int i = index; i <= html_len - tag_len; i++)
|
|
|
{
|
|
{
|
|
@@ -82,34 +68,31 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
|
|
|
}
|
|
}
|
|
|
if (open_tag_index == -1)
|
|
if (open_tag_index == -1)
|
|
|
{
|
|
{
|
|
|
- // Opening tag not found; return an empty FuriString.
|
|
|
|
|
- FURI_LOG_E("html_furi_parse", "Opening tag not found");
|
|
|
|
|
|
|
+ // Tag not found
|
|
|
return NULL;
|
|
return NULL;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Content starts immediately after the opening tag.
|
|
|
|
|
|
|
+ // Content starts after the opening tag
|
|
|
int content_start = open_tag_index + tag_len;
|
|
int content_start = open_tag_index + tag_len;
|
|
|
|
|
|
|
|
- // Skip any leading whitespace.
|
|
|
|
|
|
|
+ // Skip leading whitespace
|
|
|
while (content_start < html_len && furi_string_get_char(html, content_start) == ' ')
|
|
while (content_start < html_len && furi_string_get_char(html, content_start) == ' ')
|
|
|
{
|
|
{
|
|
|
content_start++;
|
|
content_start++;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Now search for the matching closing tag. We use a depth counter to handle nested tags.
|
|
|
|
|
|
|
+ // Find matching closing tag, accounting for nested tags
|
|
|
int depth = 1;
|
|
int depth = 1;
|
|
|
int i = content_start;
|
|
int i = content_start;
|
|
|
int matching_close_index = -1;
|
|
int matching_close_index = -1;
|
|
|
while (i <= html_len - 1)
|
|
while (i <= html_len - 1)
|
|
|
{
|
|
{
|
|
|
- // Check for opening tag first.
|
|
|
|
|
if (furi_string_sub_equals(html, i, tag))
|
|
if (furi_string_sub_equals(html, i, tag))
|
|
|
{
|
|
{
|
|
|
depth++;
|
|
depth++;
|
|
|
i += tag_len;
|
|
i += tag_len;
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- // Check for closing tag.
|
|
|
|
|
if (furi_string_sub_equals(html, i, closing_tag))
|
|
if (furi_string_sub_equals(html, i, closing_tag))
|
|
|
{
|
|
{
|
|
|
depth--;
|
|
depth--;
|
|
@@ -126,51 +109,150 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
|
|
|
|
|
|
|
|
if (matching_close_index == -1)
|
|
if (matching_close_index == -1)
|
|
|
{
|
|
{
|
|
|
- // Matching closing tag not found; return an empty FuriString.
|
|
|
|
|
- FURI_LOG_E("html_furi_parse", "Matching closing tag not found");
|
|
|
|
|
|
|
+ // No matching close => return NULL or partial content as you choose
|
|
|
return NULL;
|
|
return NULL;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // The content spans from content_start up to matching_close_index.
|
|
|
|
|
|
|
+ // Copy the content between <tag>...</tag>
|
|
|
size_t content_length = matching_close_index - content_start;
|
|
size_t content_length = matching_close_index - content_start;
|
|
|
- if (memmgr_get_free_heap() < (content_length + 1 + 4096)) // 4KB buffer
|
|
|
|
|
|
|
+
|
|
|
|
|
+ if (memmgr_get_free_heap() < (content_length + 1 + 1024))
|
|
|
{
|
|
{
|
|
|
FURI_LOG_E("html_furi_parse", "Not enough heap to allocate result");
|
|
FURI_LOG_E("html_furi_parse", "Not enough heap to allocate result");
|
|
|
return NULL;
|
|
return NULL;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Allocate the result string and copy the content.
|
|
|
|
|
|
|
+ // Allocate and copy
|
|
|
FuriString *result = furi_string_alloc();
|
|
FuriString *result = furi_string_alloc();
|
|
|
- furi_string_reserve(result, content_length);
|
|
|
|
|
|
|
+ furi_string_reserve(result, content_length + 1);
|
|
|
furi_string_set_n(result, html, content_start, content_length);
|
|
furi_string_set_n(result, html, content_start, content_length);
|
|
|
|
|
|
|
|
return result;
|
|
return result;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+static FuriString *_html_furi_find_tag(const char *tag, FuriString *html, size_t index, int *out_next_index)
|
|
|
|
|
+{
|
|
|
|
|
+ // Clear next index in case of early return
|
|
|
|
|
+ *out_next_index = -1;
|
|
|
|
|
+
|
|
|
|
|
+ int tag_len = strlen(tag);
|
|
|
|
|
+ if (tag_len < 3)
|
|
|
|
|
+ {
|
|
|
|
|
+ FURI_LOG_E("html_furi_parse", "Invalid tag length");
|
|
|
|
|
+ return NULL;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Extract "p" from "<p>"
|
|
|
|
|
+ int inner_len = tag_len - 2;
|
|
|
|
|
+ char inner_tag[inner_len + 1];
|
|
|
|
|
+ for (int i = 0; i < inner_len; i++)
|
|
|
|
|
+ {
|
|
|
|
|
+ inner_tag[i] = tag[i + 1];
|
|
|
|
|
+ }
|
|
|
|
|
+ inner_tag[inner_len] = '\0';
|
|
|
|
|
+
|
|
|
|
|
+ // Create closing tag => "</p>"
|
|
|
|
|
+ char closing_tag[inner_len + 4];
|
|
|
|
|
+ snprintf(closing_tag, sizeof(closing_tag), "</%s>", inner_tag);
|
|
|
|
|
+
|
|
|
|
|
+ int html_len = furi_string_size(html);
|
|
|
|
|
+
|
|
|
|
|
+ // 1) Find opening tag from `index`.
|
|
|
|
|
+ int open_tag_index = -1;
|
|
|
|
|
+ for (int i = index; i <= html_len - tag_len; i++)
|
|
|
|
|
+ {
|
|
|
|
|
+ if (furi_string_sub_equals(html, i, tag))
|
|
|
|
|
+ {
|
|
|
|
|
+ open_tag_index = i;
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ if (open_tag_index == -1)
|
|
|
|
|
+ {
|
|
|
|
|
+ return NULL; // no more occurrences
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // The content begins after the opening tag.
|
|
|
|
|
+ int content_start = open_tag_index + tag_len;
|
|
|
|
|
+
|
|
|
|
|
+ // skip leading spaces
|
|
|
|
|
+ while (content_start < html_len && furi_string_get_char(html, content_start) == ' ')
|
|
|
|
|
+ {
|
|
|
|
|
+ content_start++;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ int depth = 1;
|
|
|
|
|
+ int i = content_start;
|
|
|
|
|
+ int matching_close_index = -1;
|
|
|
|
|
+
|
|
|
|
|
+ while (i < html_len)
|
|
|
|
|
+ {
|
|
|
|
|
+ if (furi_string_sub_equals(html, i, tag))
|
|
|
|
|
+ {
|
|
|
|
|
+ depth++;
|
|
|
|
|
+ i += tag_len;
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (furi_string_sub_equals(html, i, closing_tag))
|
|
|
|
|
+ {
|
|
|
|
|
+ depth--;
|
|
|
|
|
+ i += strlen(closing_tag);
|
|
|
|
|
+ if (depth == 0)
|
|
|
|
|
+ {
|
|
|
|
|
+ matching_close_index = i - strlen(closing_tag);
|
|
|
|
|
+ // i now points just after "</p>"
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ else
|
|
|
|
|
+ {
|
|
|
|
|
+ i++;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (matching_close_index == -1)
|
|
|
|
|
+ {
|
|
|
|
|
+ // No matching close tag found
|
|
|
|
|
+ return NULL;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ size_t content_length = matching_close_index - content_start;
|
|
|
|
|
+
|
|
|
|
|
+ // Allocate the result
|
|
|
|
|
+ FuriString *result = furi_string_alloc();
|
|
|
|
|
+ furi_string_reserve(result, content_length + 1); // +1 for safety
|
|
|
|
|
+ furi_string_set_n(result, html, content_start, content_length);
|
|
|
|
|
+
|
|
|
|
|
+ *out_next_index = i;
|
|
|
|
|
+
|
|
|
|
|
+ return result;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
/*
|
|
/*
|
|
|
- * @brief Parse all Furigana strings from an HTML tag, handling nested child tags.
|
|
|
|
|
- * @param tag The HTML tag to parse (including the angle brackets).
|
|
|
|
|
- * @param html The HTML string to parse (as a FuriString).
|
|
|
|
|
- * @return A newly allocated FuriString containing the parsed content,
|
|
|
|
|
- * or an empty FuriString if the tag is not found.
|
|
|
|
|
|
|
+ * Parse *all* occurrences of <tag> in `html`, handling nested tags.
|
|
|
|
|
+ * Returns a FuriString concatenating all parsed contents.
|
|
|
*/
|
|
*/
|
|
|
FuriString *html_furi_find_tags(const char *tag, FuriString *html)
|
|
FuriString *html_furi_find_tags(const char *tag, FuriString *html)
|
|
|
{
|
|
{
|
|
|
FuriString *result = furi_string_alloc();
|
|
FuriString *result = furi_string_alloc();
|
|
|
size_t index = 0;
|
|
size_t index = 0;
|
|
|
|
|
+
|
|
|
while (true)
|
|
while (true)
|
|
|
{
|
|
{
|
|
|
- FuriString *parsed = html_furi_find_tag(tag, html, index);
|
|
|
|
|
|
|
+ int next_index;
|
|
|
|
|
+ FuriString *parsed = _html_furi_find_tag(tag, html, index, &next_index);
|
|
|
if (parsed == NULL)
|
|
if (parsed == NULL)
|
|
|
{
|
|
{
|
|
|
|
|
+ // No more tags from 'index' onward
|
|
|
break;
|
|
break;
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ // Append the found content
|
|
|
furi_string_cat(result, parsed);
|
|
furi_string_cat(result, parsed);
|
|
|
furi_string_free(parsed);
|
|
furi_string_free(parsed);
|
|
|
- // start after the strlen(tag)
|
|
|
|
|
- // this is so we don't miss the inner tags
|
|
|
|
|
- // I may change this to: index += furi_string_size(parsed)
|
|
|
|
|
- index += strlen(tag);
|
|
|
|
|
|
|
+
|
|
|
|
|
+ // Resume searching at `next_index` (just after `</tag>`).
|
|
|
|
|
+ index = next_index;
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
return result;
|
|
return result;
|
|
|
-}
|
|
|
|
|
|
|
+}
|