1 год назад · b2dbe76513
--- a/callback/web_crawler_callback.c
+++ b/callback/web_crawler_callback.c
@@ -747,44 +747,18 @@ static char *web_crawler_parse(DataLoaderModel *model)
 
															         {
														
 
															             // parse HTML then return response
														
 
															             FuriString *returned_data = flipper_http_load_from_file(model->fhttp->file_path);
														
 
															-            if (returned_data == NULL)
														
 
															+            if (returned_data == NULL || furi_string_size(returned_data) == 0)
														
 
															             {
														
 
															                 return "Failed to load HTML response.\nPress BACK to return.";
														
 
															             }
														
 
															             // parse HTML response
														
 
															-            FuriString *h1_tag = html_furi_find_tag("<h1>", returned_data, 0);
														
 
															-            FuriString *p_tag = html_furi_find_tag("<p>", returned_data, 0);
														
 
															+            FuriString *p_tags = html_furi_find_tags("<p>", returned_data);
														
 
															             furi_string_free(returned_data);
														
 
															-            if (p_tag == NULL && h1_tag == NULL)
														
 
															+            if (p_tags == NULL)
														
 
															             {
														
 
															-                return "Failed to find <h1> or <p> tag.\nPress BACK to return.";
														
 
															-            }
														
 
															-            else if (p_tag && h1_tag)
														
 
															-            {
														
 
															-                FuriString *combined = furi_string_alloc_printf("%s\n%s", furi_string_get_cstr(h1_tag), furi_string_get_cstr(p_tag));
														
 
															-                if (combined)
														
 
															-                {
														
 
															-                    furi_string_free(h1_tag);
														
 
															-                    furi_string_free(p_tag);
														
 
															-                    return (char *)furi_string_get_cstr(combined);
														
 
															-                }
														
 
															-                else
														
 
															-                {
														
 
															-                    furi_string_free(h1_tag);
														
 
															-                    furi_string_free(p_tag);
														
 
															-                    return "Failed to combine <h1> and <p> tags.\nPress BACK to return.";
														
 
															-                }
														
 
															-            }
														
 
															-            else if (h1_tag != NULL)
														
 
															-            {
														
 
															-                furi_string_free(p_tag);
														
 
															-                return (char *)furi_string_get_cstr(h1_tag);
														
 
															-            }
														
 
															-            else if (p_tag != NULL)
														
 
															-            {
														
 
															-                furi_string_free(h1_tag);
														
 
															-                return (char *)furi_string_get_cstr(p_tag);
														
 
															+                return "Failed to find <p> tag.\nPress BACK to return.";
														
 
															             }
														
 
															+            return (char *)furi_string_get_cstr(p_tags);
														
 
															         }
														
 
															     }
														
 
															     return "Data saved to file.\nPress BACK to return.";
														
--- a/html/html_furi.c
+++ b/html/html_furi.c
@@ -4,14 +4,13 @@
 
															 #include <html/html_furi.h>
														
 
															 /*
														
 
															- * Helper function: Checks if the substring of the FuriString starting at index `pos`
														
 
															+ * Checks if the substring of the FuriString starting at index `pos`
														
 
															  * matches the given C-string `needle`.
														
 
															- * Returns true if it matches; otherwise false.
														
 
															  */
														
 
															 static bool furi_string_sub_equals(FuriString *str, int pos, const char *needle)
														
 
															 {
														
 
															     size_t needle_len = strlen(needle);
														
 
															-    if (pos + needle_len > furi_string_size(str))
														
 
															+    if ((size_t)pos + needle_len > furi_string_size(str))
														
 
															     {
														
 
															         return false;
														
 
															     }
														
@@ -26,36 +25,24 @@ static bool furi_string_sub_equals(FuriString *str, int pos, const char *needle)
 
															 }
														
 
															 /*
														
 
															- * @brief Parse a Furigana string from an HTML tag, handling nested child tags.
														
 
															+ * Parse the content for a given HTML tag <tag> in `html`, handling nested tags.
														
 
															+ * Returns a newly allocated FuriString or NULL on error.
														
 
															  *
														
 
															- * This version accepts an HTML tag as a C-string (e.g., "<p>") and searches
														
 
															- * for the content inside the corresponding opening and closing tags within
														
 
															- * the provided HTML string, taking into account nested occurrences of the tag.
														
 
															- *
														
 
															- * For example, given the HTML string:
														
 
															- *     "<p><h1><p><h1>Test</h1></p></h1></p>"
														
 
															- * and searching with tag "<p>" the function will return:
														
 
															- *     "<h1><p><h1>Test</h1></p></h1>"
														
 
															- *
														
 
															- * @param tag The HTML tag to parse (including the angle brackets).
														
 
															- * @param html The HTML string to parse (as a FuriString).
														
 
															- * @return A newly allocated FuriString containing the parsed content,
														
 
															- *         or an empty FuriString if the tag is not found.
														
 
															+ * @param tag    e.g. "<p>"
														
 
															+ * @param html   The HTML string to parse.
														
 
															+ * @param index  The position in `html` from where to start searching.
														
 
															  */
														
 
															 FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
														
 
															 {
														
 
															     int tag_len = strlen(tag);
														
 
															-
														
 
															-    // Ensure the tag is at least 3 characters long (e.g., "<p>")
														
 
															     if (tag_len < 3)
														
 
															     {
														
 
															         FURI_LOG_E("html_furi_parse", "Invalid tag length");
														
 
															         return NULL;
														
 
															     }
														
 
															-    // Extract the inner tag name from the provided C-string tag.
														
 
															-    // For example, for "<p>" extract "p".
														
 
															-    int inner_len = tag_len - 2; // Exclude the '<' and '>'
														
 
															+    // Extract the tag name from <p> => "p"
														
 
															+    int inner_len = tag_len - 2; // exclude '<' and '>'
														
 
															     char inner_tag[inner_len + 1];
														
 
															     for (int i = 0; i < inner_len; i++)
														
 
															     {
														
@@ -63,14 +50,13 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
 
															     }
														
 
															     inner_tag[inner_len] = '\0';
														
 
															-    // Build the expected closing tag as a C-string (e.g., "</p>").
														
 
															-    int closing_tag_size = inner_len + 4; // "</" + inner tag + ">" + '\0'
														
 
															-    char closing_tag[closing_tag_size];
														
 
															-    snprintf(closing_tag, closing_tag_size, "</%s>", inner_tag);
														
 
															+    // Build closing tag => "</p>"
														
 
															+    char closing_tag[inner_len + 4];
														
 
															+    snprintf(closing_tag, sizeof(closing_tag), "</%s>", inner_tag);
														
 
															-    // Find the opening tag in the HTML.
														
 
															-    // Locate the first occurrence of the opening tag.
														
 
															     int html_len = furi_string_size(html);
														
 
															+
														
 
															+    // Find the first occurrence of the opening tag
														
 
															     int open_tag_index = -1;
														
 
															     for (int i = index; i <= html_len - tag_len; i++)
														
 
															     {
														
@@ -82,34 +68,31 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
 
															     }
														
 
															     if (open_tag_index == -1)
														
 
															     {
														
 
															-        // Opening tag not found; return an empty FuriString.
														
 
															-        FURI_LOG_E("html_furi_parse", "Opening tag not found");
														
 
															+        // Tag not found
														
 
															         return NULL;
														
 
															     }
														
 
															-    // Content starts immediately after the opening tag.
														
 
															+    // Content starts after the opening tag
														
 
															     int content_start = open_tag_index + tag_len;
														
 
															-    // Skip any leading whitespace.
														
 
															+    // Skip leading whitespace
														
 
															     while (content_start < html_len && furi_string_get_char(html, content_start) == ' ')
														
 
															     {
														
 
															         content_start++;
														
 
															     }
														
 
															-    // Now search for the matching closing tag. We use a depth counter to handle nested tags.
														
 
															+    // Find matching closing tag, accounting for nested tags
														
 
															     int depth = 1;
														
 
															     int i = content_start;
														
 
															     int matching_close_index = -1;
														
 
															     while (i <= html_len - 1)
														
 
															     {
														
 
															-        // Check for opening tag first.
														
 
															         if (furi_string_sub_equals(html, i, tag))
														
 
															         {
														
 
															             depth++;
														
 
															             i += tag_len;
														
 
															             continue;
														
 
															         }
														
 
															-        // Check for closing tag.
														
 
															         if (furi_string_sub_equals(html, i, closing_tag))
														
 
															         {
														
 
															             depth--;
														
@@ -126,51 +109,150 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
 
															     if (matching_close_index == -1)
														
 
															     {
														
 
															-        // Matching closing tag not found; return an empty FuriString.
														
 
															-        FURI_LOG_E("html_furi_parse", "Matching closing tag not found");
														
 
															+        // No matching close => return NULL or partial content as you choose
														
 
															         return NULL;
														
 
															     }
														
 
															-    // The content spans from content_start up to matching_close_index.
														
 
															+    // Copy the content between <tag>...</tag>
														
 
															     size_t content_length = matching_close_index - content_start;
														
 
															-    if (memmgr_get_free_heap() < (content_length + 1 + 4096)) // 4KB buffer
														
 
															+
														
 
															+    if (memmgr_get_free_heap() < (content_length + 1 + 1024))
														
 
															     {
														
 
															         FURI_LOG_E("html_furi_parse", "Not enough heap to allocate result");
														
 
															         return NULL;
														
 
															     }
														
 
															-    // Allocate the result string and copy the content.
														
 
															+    // Allocate and copy
														
 
															     FuriString *result = furi_string_alloc();
														
 
															-    furi_string_reserve(result, content_length);
														
 
															+    furi_string_reserve(result, content_length + 1);
														
 
															     furi_string_set_n(result, html, content_start, content_length);
														
 
															     return result;
														
 
															 }
														
 
															+static FuriString *_html_furi_find_tag(const char *tag, FuriString *html, size_t index, int *out_next_index)
														
 
															+{
														
 
															+    // Clear next index in case of early return
														
 
															+    *out_next_index = -1;
														
 
															+
														
 
															+    int tag_len = strlen(tag);
														
 
															+    if (tag_len < 3)
														
 
															+    {
														
 
															+        FURI_LOG_E("html_furi_parse", "Invalid tag length");
														
 
															+        return NULL;
														
 
															+    }
														
 
															+
														
 
															+    // Extract "p" from "<p>"
														
 
															+    int inner_len = tag_len - 2;
														
 
															+    char inner_tag[inner_len + 1];
														
 
															+    for (int i = 0; i < inner_len; i++)
														
 
															+    {
														
 
															+        inner_tag[i] = tag[i + 1];
														
 
															+    }
														
 
															+    inner_tag[inner_len] = '\0';
														
 
															+
														
 
															+    // Create closing tag => "</p>"
														
 
															+    char closing_tag[inner_len + 4];
														
 
															+    snprintf(closing_tag, sizeof(closing_tag), "</%s>", inner_tag);
														
 
															+
														
 
															+    int html_len = furi_string_size(html);
														
 
															+
														
 
															+    // 1) Find opening tag from `index`.
														
 
															+    int open_tag_index = -1;
														
 
															+    for (int i = index; i <= html_len - tag_len; i++)
														
 
															+    {
														
 
															+        if (furi_string_sub_equals(html, i, tag))
														
 
															+        {
														
 
															+            open_tag_index = i;
														
 
															+            break;
														
 
															+        }
														
 
															+    }
														
 
															+    if (open_tag_index == -1)
														
 
															+    {
														
 
															+        return NULL; // no more occurrences
														
 
															+    }
														
 
															+
														
 
															+    // The content begins after the opening tag.
														
 
															+    int content_start = open_tag_index + tag_len;
														
 
															+
														
 
															+    // skip leading spaces
														
 
															+    while (content_start < html_len && furi_string_get_char(html, content_start) == ' ')
														
 
															+    {
														
 
															+        content_start++;
														
 
															+    }
														
 
															+
														
 
															+    int depth = 1;
														
 
															+    int i = content_start;
														
 
															+    int matching_close_index = -1;
														
 
															+
														
 
															+    while (i < html_len)
														
 
															+    {
														
 
															+        if (furi_string_sub_equals(html, i, tag))
														
 
															+        {
														
 
															+            depth++;
														
 
															+            i += tag_len;
														
 
															+        }
														
 
															+        else if (furi_string_sub_equals(html, i, closing_tag))
														
 
															+        {
														
 
															+            depth--;
														
 
															+            i += strlen(closing_tag);
														
 
															+            if (depth == 0)
														
 
															+            {
														
 
															+                matching_close_index = i - strlen(closing_tag);
														
 
															+                // i now points just after "</p>"
														
 
															+                break;
														
 
															+            }
														
 
															+        }
														
 
															+        else
														
 
															+        {
														
 
															+            i++;
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    if (matching_close_index == -1)
														
 
															+    {
														
 
															+        // No matching close tag found
														
 
															+        return NULL;
														
 
															+    }
														
 
															+
														
 
															+    size_t content_length = matching_close_index - content_start;
														
 
															+
														
 
															+    // Allocate the result
														
 
															+    FuriString *result = furi_string_alloc();
														
 
															+    furi_string_reserve(result, content_length + 1); // +1 for safety
														
 
															+    furi_string_set_n(result, html, content_start, content_length);
														
 
															+
														
 
															+    *out_next_index = i;
														
 
															+
														
 
															+    return result;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															- * @brief Parse all Furigana strings from an HTML tag, handling nested child tags.
														
 
															- * @param tag The HTML tag to parse (including the angle brackets).
														
 
															- * @param html The HTML string to parse (as a FuriString).
														
 
															- * @return A newly allocated FuriString containing the parsed content,
														
 
															- *         or an empty FuriString if the tag is not found.
														
 
															+ * Parse *all* occurrences of <tag> in `html`, handling nested tags.
														
 
															+ * Returns a FuriString concatenating all parsed contents.
														
 
															  */
														
 
															 FuriString *html_furi_find_tags(const char *tag, FuriString *html)
														
 
															 {
														
 
															     FuriString *result = furi_string_alloc();
														
 
															     size_t index = 0;
														
 
															+
														
 
															     while (true)
														
 
															     {
														
 
															-        FuriString *parsed = html_furi_find_tag(tag, html, index);
														
 
															+        int next_index;
														
 
															+        FuriString *parsed = _html_furi_find_tag(tag, html, index, &next_index);
														
 
															         if (parsed == NULL)
														
 
															         {
														
 
															+            // No more tags from 'index' onward
														
 
															             break;
														
 
															         }
														
 
															+
														
 
															+        // Append the found content
														
 
															         furi_string_cat(result, parsed);
														
 
															         furi_string_free(parsed);
														
 
															-        // start after the strlen(tag)
														
 
															-        // this is so we don't miss the inner tags
														
 
															-        // I may change this to: index += furi_string_size(parsed)
														
 
															-        index += strlen(tag);
														
 
															+
														
 
															+        // Resume searching at `next_index` (just after `</tag>`).
														
 
															+        index = next_index;
														
 
															     }
														
 
															+
														
 
															     return result;
														
 
															-}
														
 
															+}