Просмотр исходного кода

add helper for html_furi_find_tags

jblanked 1 год назад
Родитель
Сommit
b2dbe76513
2 измененных файлов с 139 добавлено и 83 удалено
  1. 5 31
      callback/web_crawler_callback.c
  2. 134 52
      html/html_furi.c

+ 5 - 31
callback/web_crawler_callback.c

@@ -747,44 +747,18 @@ static char *web_crawler_parse(DataLoaderModel *model)
         {
             // parse HTML then return response
             FuriString *returned_data = flipper_http_load_from_file(model->fhttp->file_path);
-            if (returned_data == NULL)
+            if (returned_data == NULL || furi_string_size(returned_data) == 0)
             {
                 return "Failed to load HTML response.\nPress BACK to return.";
             }
             // parse HTML response
-            FuriString *h1_tag = html_furi_find_tag("<h1>", returned_data, 0);
-            FuriString *p_tag = html_furi_find_tag("<p>", returned_data, 0);
+            FuriString *p_tags = html_furi_find_tags("<p>", returned_data);
             furi_string_free(returned_data);
-            if (p_tag == NULL && h1_tag == NULL)
+            if (p_tags == NULL)
             {
-                return "Failed to find <h1> or <p> tag.\nPress BACK to return.";
-            }
-            else if (p_tag && h1_tag)
-            {
-                FuriString *combined = furi_string_alloc_printf("%s\n%s", furi_string_get_cstr(h1_tag), furi_string_get_cstr(p_tag));
-                if (combined)
-                {
-                    furi_string_free(h1_tag);
-                    furi_string_free(p_tag);
-                    return (char *)furi_string_get_cstr(combined);
-                }
-                else
-                {
-                    furi_string_free(h1_tag);
-                    furi_string_free(p_tag);
-                    return "Failed to combine <h1> and <p> tags.\nPress BACK to return.";
-                }
-            }
-            else if (h1_tag != NULL)
-            {
-                furi_string_free(p_tag);
-                return (char *)furi_string_get_cstr(h1_tag);
-            }
-            else if (p_tag != NULL)
-            {
-                furi_string_free(h1_tag);
-                return (char *)furi_string_get_cstr(p_tag);
+                return "Failed to find <p> tag.\nPress BACK to return.";
             }
+            return (char *)furi_string_get_cstr(p_tags);
         }
     }
     return "Data saved to file.\nPress BACK to return.";

+ 134 - 52
html/html_furi.c

@@ -4,14 +4,13 @@
 #include <html/html_furi.h>
 
 /*
- * Helper function: Checks if the substring of the FuriString starting at index `pos`
+ * Checks if the substring of the FuriString starting at index `pos`
  * matches the given C-string `needle`.
- * Returns true if it matches; otherwise false.
  */
 static bool furi_string_sub_equals(FuriString *str, int pos, const char *needle)
 {
     size_t needle_len = strlen(needle);
-    if (pos + needle_len > furi_string_size(str))
+    if ((size_t)pos + needle_len > furi_string_size(str))
     {
         return false;
     }
@@ -26,36 +25,24 @@ static bool furi_string_sub_equals(FuriString *str, int pos, const char *needle)
 }
 
 /*
- * @brief Parse a Furigana string from an HTML tag, handling nested child tags.
+ * Parse the content for a given HTML tag <tag> in `html`, handling nested tags.
+ * Returns a newly allocated FuriString or NULL on error.
  *
- * This version accepts an HTML tag as a C-string (e.g., "<p>") and searches
- * for the content inside the corresponding opening and closing tags within
- * the provided HTML string, taking into account nested occurrences of the tag.
- *
- * For example, given the HTML string:
- *     "<p><h1><p><h1>Test</h1></p></h1></p>"
- * and searching with tag "<p>" the function will return:
- *     "<h1><p><h1>Test</h1></p></h1>"
- *
- * @param tag The HTML tag to parse (including the angle brackets).
- * @param html The HTML string to parse (as a FuriString).
- * @return A newly allocated FuriString containing the parsed content,
- *         or an empty FuriString if the tag is not found.
+ * @param tag    e.g. "<p>"
+ * @param html   The HTML string to parse.
+ * @param index  The position in `html` from where to start searching.
  */
 FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
 {
     int tag_len = strlen(tag);
-
-    // Ensure the tag is at least 3 characters long (e.g., "<p>")
     if (tag_len < 3)
     {
         FURI_LOG_E("html_furi_parse", "Invalid tag length");
         return NULL;
     }
 
-    // Extract the inner tag name from the provided C-string tag.
-    // For example, for "<p>" extract "p".
-    int inner_len = tag_len - 2; // Exclude the '<' and '>'
+    // Extract the tag name from <p> => "p"
+    int inner_len = tag_len - 2; // exclude '<' and '>'
     char inner_tag[inner_len + 1];
     for (int i = 0; i < inner_len; i++)
     {
@@ -63,14 +50,13 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
     }
     inner_tag[inner_len] = '\0';
 
-    // Build the expected closing tag as a C-string (e.g., "</p>").
-    int closing_tag_size = inner_len + 4; // "</" + inner tag + ">" + '\0'
-    char closing_tag[closing_tag_size];
-    snprintf(closing_tag, closing_tag_size, "</%s>", inner_tag);
+    // Build closing tag => "</p>"
+    char closing_tag[inner_len + 4];
+    snprintf(closing_tag, sizeof(closing_tag), "</%s>", inner_tag);
 
-    // Find the opening tag in the HTML.
-    // Locate the first occurrence of the opening tag.
     int html_len = furi_string_size(html);
+
+    // Find the first occurrence of the opening tag
     int open_tag_index = -1;
     for (int i = index; i <= html_len - tag_len; i++)
     {
@@ -82,34 +68,31 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
     }
     if (open_tag_index == -1)
     {
-        // Opening tag not found; return an empty FuriString.
-        FURI_LOG_E("html_furi_parse", "Opening tag not found");
+        // Tag not found
         return NULL;
     }
 
-    // Content starts immediately after the opening tag.
+    // Content starts after the opening tag
     int content_start = open_tag_index + tag_len;
 
-    // Skip any leading whitespace.
+    // Skip leading whitespace
     while (content_start < html_len && furi_string_get_char(html, content_start) == ' ')
     {
         content_start++;
     }
 
-    // Now search for the matching closing tag. We use a depth counter to handle nested tags.
+    // Find matching closing tag, accounting for nested tags
     int depth = 1;
     int i = content_start;
     int matching_close_index = -1;
     while (i <= html_len - 1)
     {
-        // Check for opening tag first.
         if (furi_string_sub_equals(html, i, tag))
         {
             depth++;
             i += tag_len;
             continue;
         }
-        // Check for closing tag.
         if (furi_string_sub_equals(html, i, closing_tag))
         {
             depth--;
@@ -126,51 +109,150 @@ FuriString *html_furi_find_tag(const char *tag, FuriString *html, size_t index)
 
     if (matching_close_index == -1)
     {
-        // Matching closing tag not found; return an empty FuriString.
-        FURI_LOG_E("html_furi_parse", "Matching closing tag not found");
+        // No matching close => return NULL or partial content as you choose
         return NULL;
     }
 
-    // The content spans from content_start up to matching_close_index.
+    // Copy the content between <tag>...</tag>
     size_t content_length = matching_close_index - content_start;
-    if (memmgr_get_free_heap() < (content_length + 1 + 4096)) // 4KB buffer
+
+    if (memmgr_get_free_heap() < (content_length + 1 + 1024))
     {
         FURI_LOG_E("html_furi_parse", "Not enough heap to allocate result");
         return NULL;
     }
 
-    // Allocate the result string and copy the content.
+    // Allocate and copy
     FuriString *result = furi_string_alloc();
-    furi_string_reserve(result, content_length);
+    furi_string_reserve(result, content_length + 1);
     furi_string_set_n(result, html, content_start, content_length);
 
     return result;
 }
 
+static FuriString *_html_furi_find_tag(const char *tag, FuriString *html, size_t index, int *out_next_index)
+{
+    // Clear next index in case of early return
+    *out_next_index = -1;
+
+    int tag_len = strlen(tag);
+    if (tag_len < 3)
+    {
+        FURI_LOG_E("html_furi_parse", "Invalid tag length");
+        return NULL;
+    }
+
+    // Extract "p" from "<p>"
+    int inner_len = tag_len - 2;
+    char inner_tag[inner_len + 1];
+    for (int i = 0; i < inner_len; i++)
+    {
+        inner_tag[i] = tag[i + 1];
+    }
+    inner_tag[inner_len] = '\0';
+
+    // Create closing tag => "</p>"
+    char closing_tag[inner_len + 4];
+    snprintf(closing_tag, sizeof(closing_tag), "</%s>", inner_tag);
+
+    int html_len = furi_string_size(html);
+
+    // 1) Find opening tag from `index`.
+    int open_tag_index = -1;
+    for (int i = index; i <= html_len - tag_len; i++)
+    {
+        if (furi_string_sub_equals(html, i, tag))
+        {
+            open_tag_index = i;
+            break;
+        }
+    }
+    if (open_tag_index == -1)
+    {
+        return NULL; // no more occurrences
+    }
+
+    // The content begins after the opening tag.
+    int content_start = open_tag_index + tag_len;
+
+    // skip leading spaces
+    while (content_start < html_len && furi_string_get_char(html, content_start) == ' ')
+    {
+        content_start++;
+    }
+
+    int depth = 1;
+    int i = content_start;
+    int matching_close_index = -1;
+
+    while (i < html_len)
+    {
+        if (furi_string_sub_equals(html, i, tag))
+        {
+            depth++;
+            i += tag_len;
+        }
+        else if (furi_string_sub_equals(html, i, closing_tag))
+        {
+            depth--;
+            i += strlen(closing_tag);
+            if (depth == 0)
+            {
+                matching_close_index = i - strlen(closing_tag);
+                // i now points just after "</p>"
+                break;
+            }
+        }
+        else
+        {
+            i++;
+        }
+    }
+
+    if (matching_close_index == -1)
+    {
+        // No matching close tag found
+        return NULL;
+    }
+
+    size_t content_length = matching_close_index - content_start;
+
+    // Allocate the result
+    FuriString *result = furi_string_alloc();
+    furi_string_reserve(result, content_length + 1); // +1 for safety
+    furi_string_set_n(result, html, content_start, content_length);
+
+    *out_next_index = i;
+
+    return result;
+}
+
 /*
- * @brief Parse all Furigana strings from an HTML tag, handling nested child tags.
- * @param tag The HTML tag to parse (including the angle brackets).
- * @param html The HTML string to parse (as a FuriString).
- * @return A newly allocated FuriString containing the parsed content,
- *         or an empty FuriString if the tag is not found.
+ * Parse *all* occurrences of <tag> in `html`, handling nested tags.
+ * Returns a FuriString concatenating all parsed contents.
  */
 FuriString *html_furi_find_tags(const char *tag, FuriString *html)
 {
     FuriString *result = furi_string_alloc();
     size_t index = 0;
+
     while (true)
     {
-        FuriString *parsed = html_furi_find_tag(tag, html, index);
+        int next_index;
+        FuriString *parsed = _html_furi_find_tag(tag, html, index, &next_index);
         if (parsed == NULL)
         {
+            // No more tags from 'index' onward
             break;
         }
+
+        // Append the found content
         furi_string_cat(result, parsed);
         furi_string_free(parsed);
-        // start after the strlen(tag)
-        // this is so we don't miss the inner tags
-        // I may change this to: index += furi_string_size(parsed)
-        index += strlen(tag);
+
+        // Resume searching at `next_index` (just after `</tag>`).
+        index = next_index;
     }
+
     return result;
-}
+}