1 год назад · b2dbe76513
--- a/callback/web_crawler_callback.c
+++ b/callback/web_crawler_callback.c
@@ -747,44 +747,18 @@ static char *web_crawler_parse(DataLoaderModel *model)
 
				         {
			
 
				             // parse HTML then return response
			
 
				             FuriString *returned_data = flipper_http_load_from_file(model->fhttp->file_path);
			
 
				-            if (returned_data == NULL)
			
 
				+            if (returned_data == NULL || furi_string_size(returned_data) == 0)
			
 
				             {
			
 
				                 return "Failed to load HTML response.\nPress BACK to return.";
			
 
				             }
			
 
				             // parse HTML response
			
 
				-            FuriString *h1_tag = html_furi_find_tag("<h1>", returned_data, 0);
			
 
				-            FuriString *p_tag = html_furi_find_tag("<p>", returned_data, 0);
			
 
				+            FuriString *p_tags = html_furi_find_tags("<p>", returned_data);
			
 
				             furi_string_free(returned_data);
			
 
				-            if (p_tag == NULL && h1_tag == NULL)
			
 
				+            if (p_tags == NULL)
			
 
				             {
			
 
				-                return "Failed to find <h1> or <p> tag.\nPress BACK to return.";
			
 
				-            }
			
 
				-            else if (p_tag && h1_tag)
			
 
				-            {
			
 
				-                FuriString *combined = furi_string_alloc_printf("%s\n%s", furi_string_get_cstr(h1_tag), furi_string_get_cstr(p_tag));
			
 
				-                if (combined)
			
 
				-                {
			
 
				-                    furi_string_free(h1_tag);
			
 
				-                    furi_string_free(p_tag);
			
 
				-                    return (char *)furi_string_get_cstr(combined);
			
 
				-                }
			
 
				-                else
			
 
				-                {
			
 
				-                    furi_string_free(h1_tag);
			
 
				-                    furi_string_free(p_tag);
			
 
				-                    return "Failed to combine <h1> and <p> tags.\nPress BACK to return.";
			
 
				-                }
			
 
				-            }
			
 
				-            else if (h1_tag != NULL)
			
 
				-            {
			
 
				-                furi_string_free(p_tag);
			
 
				-                return (char *)furi_string_get_cstr(h1_tag);
			
 
				-            }
			
 
				-            else if (p_tag != NULL)
			
 
				-            {
			
 
				-                furi_string_free(h1_tag);
			
 
				-                return (char *)furi_string_get_cstr(p_tag);
			
 
				+                return "Failed to find <p> tag.\nPress BACK to return.";
			
 
				             }
			
 
				+            return (char *)furi_string_get_cstr(p_tags);
			
 
				         }
			
 
				     }
			
 
				     return "Data saved to file.\nPress BACK to return.";
			
--- a/html/html_furi.c
+++ b/html/html_furi.c
@@ -4,14 +4,13 @@
 
				 #include <html/html_furi.h>
			
 
				 
			
 
				 /*
			
 
				- * Helper function: Checks if the substring of the FuriString starting at index `pos`
			
 
				+ * Checks if the substring of the FuriString starting at index `pos`
			
 
				  * matches the given C-string `needle`.
			
 
				- * Returns true if it matches; otherwise false.
			
 
				  */
			
 
				 static bool furi_string_sub_equals(FuriString *str, int pos, const char *needle)
			
 
				 {
			
 
				     size_t needle_len = strlen(needle);
			
 
				-    if (pos + needle_len > furi_string_size(str))
			
 
				+    if ((size_t)pos + needle_len > furi_string_size(str))
			
 
				     {
			
 
				         return false;
			
 
				     }
			
@@ -26,36 +25,24 @@ static bool furi_string_sub_equals(FuriString *str, int pos, const char *needle)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * @brief Parse a Furigana string from an HTML tag, handling nested child tags.
			
 
				+ * Parse the content for a given HTML tag <tag> in `html`, handling nested tags.
			
 
				+ * Returns a newly allocated FuriString or NULL on error.
			
 
				  *
			
 
				- * This version accepts an HTML tag as a C-string (e.g., "<p>") and searches
			
 
				- * for the content inside the corresponding opening and closing tags within
			
 
				- * the provided HTML string, taking into account nested occurrences of the tag.
			
 
				- *
			
 
				- * For example, given the HTML string:
			
 
				- *     "<p><h1><p><h1>Test</h1></p></h1></p>"
			
 
				- * and searching with tag "<p>" the function will return:
			
 
				- *     "<h1><p><h1>Test</h1></p></h1>"
			
 
				- *
			
 
				- * @param tag The HTML tag to parse (including the angle brackets).
			
 
				- * @param html The HTML string to parse (as a FuriString).
			
 
				- * @return A newly allocated FuriString containing the parsed content,
			
 
				- *         or an empty FuriString if the tag is not found.
			
 
				+ * @param tag    e.g. "<p>"
			
 
				+ * @param html   The HTML string to parse.
			
 
				+ * @param index  The position in `html` from where to start searching.
			
 
				  */
			
 
				 FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
			
 
				 {
			
 
				     int tag_len = strlen(tag);
			
 
				-
			
 
				-    // Ensure the tag is at least 3 characters long (e.g., "<p>")
			
 
				     if (tag_len < 3)
			
 
				     {
			
 
				         FURI_LOG_E("html_furi_parse", "Invalid tag length");
			
 
				         return NULL;
			
 
				     }
			
 
				 
			
 
				-    // Extract the inner tag name from the provided C-string tag.
			
 
				-    // For example, for "<p>" extract "p".
			
 
				-    int inner_len = tag_len - 2; // Exclude the '<' and '>'
			
 
				+    // Extract the tag name from <p> => "p"
			
 
				+    int inner_len = tag_len - 2; // exclude '<' and '>'
			
 
				     char inner_tag[inner_len + 1];
			
 
				     for (int i = 0; i < inner_len; i++)
			
 
				     {
			
@@ -63,14 +50,13 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
 
				     }
			
 
				     inner_tag[inner_len] = '\0';
			
 
				 
			
 
				-    // Build the expected closing tag as a C-string (e.g., "</p>").
			
 
				-    int closing_tag_size = inner_len + 4; // "</" + inner tag + ">" + '\0'
			
 
				-    char closing_tag[closing_tag_size];
			
 
				-    snprintf(closing_tag, closing_tag_size, "</%s>", inner_tag);
			
 
				+    // Build closing tag => "</p>"
			
 
				+    char closing_tag[inner_len + 4];
			
 
				+    snprintf(closing_tag, sizeof(closing_tag), "</%s>", inner_tag);
			
 
				 
			
 
				-    // Find the opening tag in the HTML.
			
 
				-    // Locate the first occurrence of the opening tag.
			
 
				     int html_len = furi_string_size(html);
			
 
				+
			
 
				+    // Find the first occurrence of the opening tag
			
 
				     int open_tag_index = -1;
			
 
				     for (int i = index; i <= html_len - tag_len; i++)
			
 
				     {
			
@@ -82,34 +68,31 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
 
				     }
			
 
				     if (open_tag_index == -1)
			
 
				     {
			
 
				-        // Opening tag not found; return an empty FuriString.
			
 
				-        FURI_LOG_E("html_furi_parse", "Opening tag not found");
			
 
				+        // Tag not found
			
 
				         return NULL;
			
 
				     }
			
 
				 
			
 
				-    // Content starts immediately after the opening tag.
			
 
				+    // Content starts after the opening tag
			
 
				     int content_start = open_tag_index + tag_len;
			
 
				 
			
 
				-    // Skip any leading whitespace.
			
 
				+    // Skip leading whitespace
			
 
				     while (content_start < html_len && furi_string_get_char(html, content_start) == ' ')
			
 
				     {
			
 
				         content_start++;
			
 
				     }
			
 
				 
			
 
				-    // Now search for the matching closing tag. We use a depth counter to handle nested tags.
			
 
				+    // Find matching closing tag, accounting for nested tags
			
 
				     int depth = 1;
			
 
				     int i = content_start;
			
 
				     int matching_close_index = -1;
			
 
				     while (i <= html_len - 1)
			
 
				     {
			
 
				-        // Check for opening tag first.
			
 
				         if (furi_string_sub_equals(html, i, tag))
			
 
				         {
			
 
				             depth++;
			
 
				             i += tag_len;
			
 
				             continue;
			
 
				         }
			
 
				-        // Check for closing tag.
			
 
				         if (furi_string_sub_equals(html, i, closing_tag))
			
 
				         {
			
 
				             depth--;
			
@@ -126,51 +109,150 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
 
				 
			
 
				     if (matching_close_index == -1)
			
 
				     {
			
 
				-        // Matching closing tag not found; return an empty FuriString.
			
 
				-        FURI_LOG_E("html_furi_parse", "Matching closing tag not found");
			
 
				+        // No matching close => return NULL or partial content as you choose
			
 
				         return NULL;
			
 
				     }
			
 
				 
			
 
				-    // The content spans from content_start up to matching_close_index.
			
 
				+    // Copy the content between <tag>...</tag>
			
 
				     size_t content_length = matching_close_index - content_start;
			
 
				-    if (memmgr_get_free_heap() < (content_length + 1 + 4096)) // 4KB buffer
			
 
				+
			
 
				+    if (memmgr_get_free_heap() < (content_length + 1 + 1024))
			
 
				     {
			
 
				         FURI_LOG_E("html_furi_parse", "Not enough heap to allocate result");
			
 
				         return NULL;
			
 
				     }
			
 
				 
			
 
				-    // Allocate the result string and copy the content.
			
 
				+    // Allocate and copy
			
 
				     FuriString *result = furi_string_alloc();
			
 
				-    furi_string_reserve(result, content_length);
			
 
				+    furi_string_reserve(result, content_length + 1);
			
 
				     furi_string_set_n(result, html, content_start, content_length);
			
 
				 
			
 
				     return result;
			
 
				 }
			
 
				 
			
 
				+static FuriString *_html_furi_find_tag(const char *tag, FuriString *html, size_t index, int *out_next_index)
			
 
				+{
			
 
				+    // Clear next index in case of early return
			
 
				+    *out_next_index = -1;
			
 
				+
			
 
				+    int tag_len = strlen(tag);
			
 
				+    if (tag_len < 3)
			
 
				+    {
			
 
				+        FURI_LOG_E("html_furi_parse", "Invalid tag length");
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    // Extract "p" from "<p>"
			
 
				+    int inner_len = tag_len - 2;
			
 
				+    char inner_tag[inner_len + 1];
			
 
				+    for (int i = 0; i < inner_len; i++)
			
 
				+    {
			
 
				+        inner_tag[i] = tag[i + 1];
			
 
				+    }
			
 
				+    inner_tag[inner_len] = '\0';
			
 
				+
			
 
				+    // Create closing tag => "</p>"
			
 
				+    char closing_tag[inner_len + 4];
			
 
				+    snprintf(closing_tag, sizeof(closing_tag), "</%s>", inner_tag);
			
 
				+
			
 
				+    int html_len = furi_string_size(html);
			
 
				+
			
 
				+    // 1) Find opening tag from `index`.
			
 
				+    int open_tag_index = -1;
			
 
				+    for (int i = index; i <= html_len - tag_len; i++)
			
 
				+    {
			
 
				+        if (furi_string_sub_equals(html, i, tag))
			
 
				+        {
			
 
				+            open_tag_index = i;
			
 
				+            break;
			
 
				+        }
			
 
				+    }
			
 
				+    if (open_tag_index == -1)
			
 
				+    {
			
 
				+        return NULL; // no more occurrences
			
 
				+    }
			
 
				+
			
 
				+    // The content begins after the opening tag.
			
 
				+    int content_start = open_tag_index + tag_len;
			
 
				+
			
 
				+    // skip leading spaces
			
 
				+    while (content_start < html_len && furi_string_get_char(html, content_start) == ' ')
			
 
				+    {
			
 
				+        content_start++;
			
 
				+    }
			
 
				+
			
 
				+    int depth = 1;
			
 
				+    int i = content_start;
			
 
				+    int matching_close_index = -1;
			
 
				+
			
 
				+    while (i < html_len)
			
 
				+    {
			
 
				+        if (furi_string_sub_equals(html, i, tag))
			
 
				+        {
			
 
				+            depth++;
			
 
				+            i += tag_len;
			
 
				+        }
			
 
				+        else if (furi_string_sub_equals(html, i, closing_tag))
			
 
				+        {
			
 
				+            depth--;
			
 
				+            i += strlen(closing_tag);
			
 
				+            if (depth == 0)
			
 
				+            {
			
 
				+                matching_close_index = i - strlen(closing_tag);
			
 
				+                // i now points just after "</p>"
			
 
				+                break;
			
 
				+            }
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            i++;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (matching_close_index == -1)
			
 
				+    {
			
 
				+        // No matching close tag found
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    size_t content_length = matching_close_index - content_start;
			
 
				+
			
 
				+    // Allocate the result
			
 
				+    FuriString *result = furi_string_alloc();
			
 
				+    furi_string_reserve(result, content_length + 1); // +1 for safety
			
 
				+    furi_string_set_n(result, html, content_start, content_length);
			
 
				+
			
 
				+    *out_next_index = i;
			
 
				+
			
 
				+    return result;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				- * @brief Parse all Furigana strings from an HTML tag, handling nested child tags.
			
 
				- * @param tag The HTML tag to parse (including the angle brackets).
			
 
				- * @param html The HTML string to parse (as a FuriString).
			
 
				- * @return A newly allocated FuriString containing the parsed content,
			
 
				- *         or an empty FuriString if the tag is not found.
			
 
				+ * Parse *all* occurrences of <tag> in `html`, handling nested tags.
			
 
				+ * Returns a FuriString concatenating all parsed contents.
			
 
				  */
			
 
				 FuriString *html_furi_find_tags(const char *tag, FuriString *html)
			
 
				 {
			
 
				     FuriString *result = furi_string_alloc();
			
 
				     size_t index = 0;
			
 
				+
			
 
				     while (true)
			
 
				     {
			
 
				-        FuriString *parsed = html_furi_find_tag(tag, html, index);
			
 
				+        int next_index;
			
 
				+        FuriString *parsed = _html_furi_find_tag(tag, html, index, &next_index);
			
 
				         if (parsed == NULL)
			
 
				         {
			
 
				+            // No more tags from 'index' onward
			
 
				             break;
			
 
				         }
			
 
				+
			
 
				+        // Append the found content
			
 
				         furi_string_cat(result, parsed);
			
 
				         furi_string_free(parsed);
			
 
				-        // start after the strlen(tag)
			
 
				-        // this is so we don't miss the inner tags
			
 
				-        // I may change this to: index += furi_string_size(parsed)
			
 
				-        index += strlen(tag);
			
 
				+
			
 
				+        // Resume searching at `next_index` (just after `</tag>`).
			
 
				+        index = next_index;
			
 
				     }
			
 
				+
			
 
				     return result;
			
 
				-}
			
 
				+}