Browse Source

feat(#1239): Update Gitea and Forgejo due to API changes from initial cut (#1255)

feat(#1239): first cut at Gitea backups silently failing after 1st run
feat(#1239): Added Token Scope for Forgejo edge case. Also included: test coverage for fixes
BurntOutHylian 2 weeks ago
parent
commit
7afb303ffd

+ 25 - 3
backend/app/services/git_providers/base.py

@@ -12,9 +12,7 @@ class GitProviderBackend(ABC):
     @staticmethod
     def _blob_sha(content_bytes: bytes) -> str:
         """Compute the git blob SHA for content_bytes (sha1("blob {len}\\0" + data))."""
-        return hashlib.sha1(
-            f"blob {len(content_bytes)}\0".encode() + content_bytes, usedforsecurity=False
-        ).hexdigest()
+        return hashlib.sha1(f"blob {len(content_bytes)}\0".encode() + content_bytes, usedforsecurity=False).hexdigest()
 
     @staticmethod
     def _truncated_response_text(response: httpx.Response, max_length: int = 200) -> str:
@@ -24,6 +22,30 @@ class GitProviderBackend(ABC):
             return text
         return f"{text[: max_length - 3]}..."
 
+    @staticmethod
+    def _read_sha(response: httpx.Response, *path: str) -> tuple[str | None, str | None]:
+        """Walk a JSON path to a string SHA value.
+
+        Returns ``(sha, None)`` on success, ``(None, reason)`` if the body is
+        not JSON, the path is missing, or the leaf is not a string. Callers
+        use the reason to build a clear failure message instead of letting
+        ``KeyError``/``JSONDecodeError`` bubble to the outer catch-all (which
+        surfaces cryptic one-word strings like ``"'object'"`` to operators).
+        """
+        try:
+            data = response.json()
+        except ValueError:
+            return None, "non-JSON response body"
+        for key in path:
+            if not isinstance(data, dict):
+                return None, f"unexpected shape at key {key!r}"
+            if key not in data:
+                return None, f"missing key {key!r}"
+            data = data[key]
+        if not isinstance(data, str):
+            return None, f"value at {'.'.join(path)} is not a string"
+        return data, None
+
     def get_headers(self, token: str) -> dict:
         """Return HTTP headers for authenticated API requests."""
         return {

+ 93 - 3
backend/app/services/git_providers/forgejo.py

@@ -1,11 +1,101 @@
-"""Forgejo backend — currently API-compatible with Gitea (/api/v1)."""
+"""Forgejo backend — diverges from Gitea on token-scope validation (v15+)."""
+
+import logging
+
+import httpx
 
 from backend.app.services.git_providers.gitea import GiteaBackend
 
+logger = logging.getLogger(__name__)
+
 
 class ForgejoBackend(GiteaBackend):
     """Backend for Forgejo instances.
 
-    Currently API-compatible with Gitea (/api/v1). Override methods here
-    as the two projects' APIs diverge.
+    Forgejo v15+ returns 404 (not 403) for private repositories when the token
+    lacks repository scope, requiring a /user pre-check to distinguish bad tokens
+    from inaccessible repos. test_connection is overridden to handle this.
+    Other methods are inherited from GiteaBackend unchanged.
     """
+
+    async def test_connection(self, repo_url: str, token: str, client: httpx.AsyncClient) -> dict:
+        try:
+            owner, repo = self.parse_repo_url(repo_url)
+            api_base = self.get_api_base(repo_url)
+            headers = self.get_headers(token)
+
+            # Verify token validity before hitting the repo. On Forgejo v15+,
+            # private repos return 404 (not 403) when the token lacks repo scope,
+            # so we must distinguish "bad token" from "token OK but repo not visible".
+            user_resp = await client.get(f"{api_base}/user", headers=headers)
+            if user_resp.status_code == 401:
+                return {"success": False, "message": "Invalid access token", "repo_name": None, "permissions": None}
+            if user_resp.status_code == 403:
+                return {
+                    "success": False,
+                    "message": "Token has no read:user scope; cannot validate identity",
+                    "repo_name": None,
+                    "permissions": None,
+                }
+            if user_resp.status_code != 200:
+                return {
+                    "success": False,
+                    "message": f"Forgejo API error on /user: {user_resp.status_code}",
+                    "repo_name": None,
+                    "permissions": None,
+                }
+
+            repo_resp = await client.get(f"{api_base}/repos/{owner}/{repo}", headers=headers)
+
+            if repo_resp.status_code == 404:
+                return {
+                    "success": False,
+                    "message": (
+                        "Repository not found or token cannot access it. "
+                        "On Forgejo v15+, private repositories return 404 (not 403) "
+                        "when the token lacks repository scope."
+                    ),
+                    "repo_name": None,
+                    "permissions": None,
+                }
+
+            if repo_resp.status_code != 200:
+                return {
+                    "success": False,
+                    "message": f"API error: {repo_resp.status_code}",
+                    "repo_name": None,
+                    "permissions": None,
+                }
+
+            data = repo_resp.json()
+            permissions = data.get("permissions", {})
+
+            if not permissions.get("push", False):
+                return {
+                    "success": False,
+                    "message": "Token does not have push permission to this repository",
+                    "repo_name": data.get("full_name"),
+                    "permissions": permissions,
+                }
+
+            return {
+                "success": True,
+                "message": "Connection successful",
+                "repo_name": data.get("full_name"),
+                "permissions": permissions,
+            }
+
+        except Exception as e:
+            logger.exception("Forgejo connection test failed")
+            detail = str(e)[:200]
+            message = (
+                f"Connection failed: {type(e).__name__}: {detail}"
+                if detail
+                else f"Connection failed: {type(e).__name__}"
+            )
+            return {
+                "success": False,
+                "message": message,
+                "repo_name": None,
+                "permissions": None,
+            }

+ 115 - 67
backend/app/services/git_providers/gitea.py

@@ -17,8 +17,8 @@ class GiteaBackend(GitHubBackend):
     """Backend for Gitea instances.
 
     Gitea's Git Data API (/api/v1/repos/{owner}/{repo}/git/...) is *mostly*
-    compatible with GitHub's, but diverges on two points that broke real-world
-    backups (#1224, #1225):
+    compatible with GitHub's, but diverges on three points that broke real-world
+    backups (#1224, #1225, #1239):
 
     1. ``GET /git/refs/heads/{branch}`` returns a *list* of matching refs even
        when only one matches; GitHub returns a single object. The push paths
@@ -29,6 +29,12 @@ class GiteaBackend(GitHubBackend):
        empty repository — every blob POST returns 404 until the repo has at
        least one commit. ``_create_initial_commit()`` is overridden to use the
        Contents API, which seeds the branch + initial commit in a single call.
+
+    3. The Git Data API does not support atomic multi-file commits — each file
+       requires a separate blob POST followed by a tree/commit/ref sequence.
+       ``push_files()`` is overridden to use the Contents API
+       (``POST /repos/.../contents`` with a ``files`` array), which commits all
+       changed files in a single round-trip and avoids partial-commit failures.
     """
 
     @staticmethod
@@ -45,10 +51,10 @@ class GiteaBackend(GitHubBackend):
         """Extract the tree SHA from a commit response.
 
         GitHub's ``GET /git/commits/{sha}`` returns the GitCommit schema with
-        ``tree`` at the top level. Gitea's same-named endpoint returns the
+        ``tree`` at the top level. Gitea's same-named endpoint may return the
         wrapped Commit schema where ``tree`` lives under ``commit``. Try the
-        flat shape first (GitHub-compatible deployments / Gitea ≤ 1.23) then
-        fall back to the wrapped shape (Gitea 1.24+, Forgejo).
+        flat shape first (GitHub-compatible deployments and some Gitea/Forgejo
+        versions) then fall back to the wrapped shape.
         """
         tree_node = commit_data.get("tree")
         if not isinstance(tree_node, dict):
@@ -94,6 +100,7 @@ class GiteaBackend(GitHubBackend):
         branch: str,
         files: dict,
         client: httpx.AsyncClient,
+        _allow_branch_create: bool = True,
     ) -> dict:
         """Push files via the Git Data API, normalising Gitea's list-shaped ref response."""
         try:
@@ -104,6 +111,14 @@ class GiteaBackend(GitHubBackend):
             ref_response = await client.get(f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{branch}", headers=headers)
 
             if ref_response.status_code == 404:
+                if not _allow_branch_create:
+                    return {
+                        "status": "failed",
+                        "message": (
+                            f"Branch '{branch}' not found after creation — possible replication lag. "
+                            "The next scheduled backup will retry."
+                        ),
+                    }
                 return await self._create_branch_and_push(
                     client, headers, api_base, owner, repo, branch, files, repo_url, token
                 )
@@ -121,93 +136,110 @@ class GiteaBackend(GitHubBackend):
                 f"{api_base}/repos/{owner}/{repo}/git/commits/{current_commit_sha}", headers=headers
             )
             if commit_response.status_code != 200:
-                return {"status": "failed", "message": "Failed to get current commit"}
+                msg = f"Failed to get current commit (HTTP {commit_response.status_code}): {self._truncated_response_text(commit_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
             current_tree_sha = self._commit_tree_sha(commit_response.json())
             if not current_tree_sha:
-                return {"status": "failed", "message": "Failed to extract tree SHA from commit response"}
+                msg = (
+                    f"Failed to extract tree SHA from commit response: {self._truncated_response_text(commit_response)}"
+                )
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
             tree_response = await client.get(
                 f"{api_base}/repos/{owner}/{repo}/git/trees/{current_tree_sha}?recursive=1", headers=headers
             )
+            if tree_response.status_code != 200:
+                msg = f"Failed to list existing tree (HTTP {tree_response.status_code}): {self._truncated_response_text(tree_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg, "error": self._truncated_response_text(tree_response)}
+            tree_data = tree_response.json()
+            # Gitea's tree API can report ``truncated: true`` for large
+            # listings; if we honour the partial map, the dedup check misses
+            # and every file gets re-uploaded each run.
+            if tree_data.get("truncated"):
+                msg = (
+                    "Repository tree exceeds the Gitea API listing limit (truncated=true). "
+                    "Rotate the backup repository to avoid silent file-by-file churn on every backup."
+                )
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
             existing_files: dict[str, str] = {}
-            if tree_response.status_code == 200:
-                for item in tree_response.json().get("tree", []):
-                    if item["type"] == "blob":
-                        existing_files[item["path"]] = item["sha"]
+            for item in tree_data.get("tree", []):
+                if item.get("type") != "blob":
+                    continue
+                path, sha = item.get("path"), item.get("sha")
+                if not path or not sha:
+                    logger.warning("push_files: skipping malformed tree entry: %s", item)
+                    continue
+                existing_files[path] = sha
 
-            tree_items = []
+            api_files = []
             files_changed = 0
 
             for path, content in files.items():
                 content_str = json.dumps(content, indent=2, default=str)
                 content_bytes = content_str.encode("utf-8")
+                content_b64 = base64.b64encode(content_bytes).decode()
                 content_sha = self._blob_sha(content_bytes)
 
-                if path in existing_files and existing_files[path] == content_sha:
-                    continue
-
-                blob_response = await client.post(
-                    f"{api_base}/repos/{owner}/{repo}/git/blobs",
-                    headers=headers,
-                    json={"content": base64.b64encode(content_bytes).decode(), "encoding": "base64"},
-                )
-                if blob_response.status_code != 201:
-                    logger.error("Failed to create blob for %s: %s", path, self._truncated_response_text(blob_response))
-                    continue
-
-                tree_items.append({"path": path, "mode": "100644", "type": "blob", "sha": blob_response.json()["sha"]})
+                if path in existing_files:
+                    if existing_files[path] == content_sha:
+                        continue
+                    api_files.append(
+                        {"operation": "update", "path": path, "content": content_b64, "sha": existing_files[path]}
+                    )
+                else:
+                    api_files.append({"operation": "create", "path": path, "content": content_b64})
                 files_changed += 1
 
-            if not tree_items:
+            if not api_files:
                 return {"status": "skipped", "message": "No changes to commit", "commit_sha": None, "files_changed": 0}
 
-            tree_response = await client.post(
-                f"{api_base}/repos/{owner}/{repo}/git/trees",
+            commit_message = f"Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"
+            response = await client.post(
+                f"{api_base}/repos/{owner}/{repo}/contents",
                 headers=headers,
-                json={"base_tree": current_tree_sha, "tree": tree_items},
+                json={"branch": branch, "message": commit_message, "files": api_files},
             )
-            if tree_response.status_code != 201:
+
+            if response.status_code == 404:
                 return {
                     "status": "failed",
-                    "message": f"Failed to create tree: {self._truncated_response_text(tree_response)}",
+                    "message": "Contents API endpoint not found — your Gitea instance may be older than v1.18 or the API may be disabled by an administrator (POST /contents returned 404)",
                 }
-
-            new_tree_sha = tree_response.json()["sha"]
-            commit_message = f"Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"
-            commit_response = await client.post(
-                f"{api_base}/repos/{owner}/{repo}/git/commits",
-                headers=headers,
-                json={"message": commit_message, "tree": new_tree_sha, "parents": [current_commit_sha]},
-            )
-            if commit_response.status_code != 201:
+            if response.status_code == 409:
                 return {
                     "status": "failed",
-                    "message": f"Failed to create commit: {self._truncated_response_text(commit_response)}",
+                    "message": (
+                        "Conflict committing files — the branch likely advanced concurrently "
+                        "(web-UI edit, another backup run, or path-vs-tree collision). "
+                        "The next scheduled backup will re-read the current tree and resolve this."
+                    ),
                 }
-
-            new_commit_sha = commit_response.json()["sha"]
-
-            ref_update = await client.patch(
-                f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{branch}",
-                headers=headers,
-                json={"sha": new_commit_sha},
-            )
-            if ref_update.status_code != 200:
+            if response.status_code not in (200, 201):
                 return {
                     "status": "failed",
-                    "message": f"Failed to update branch: {self._truncated_response_text(ref_update)}",
+                    "message": f"Backup commit failed: {self._truncated_response_text(response)}",
                 }
 
+            commit_sha = (response.json().get("commit") or {}).get("sha")
+            message = (
+                f"Backup successful - {files_changed} files updated"
+                if commit_sha
+                else f"Backup successful - {files_changed} files updated (commit SHA not reported by server)"
+            )
             return {
                 "status": "success",
-                "message": f"Backup successful - {files_changed} files updated",
-                "commit_sha": new_commit_sha,
+                "message": message,
+                "commit_sha": commit_sha,
                 "files_changed": files_changed,
             }
 
         except Exception as e:
-            logger.error("Push to Git failed: %s", e)
+            logger.exception("push_files failed for %s branch=%s", repo_url, branch)
             return {"status": "failed", "message": str(e), "error": str(e)}
 
     async def _create_branch_and_push(
@@ -226,33 +258,44 @@ class GiteaBackend(GitHubBackend):
         try:
             repo_response = await client.get(f"{api_base}/repos/{owner}/{repo}", headers=headers)
             if repo_response.status_code != 200:
-                return {"status": "failed", "message": "Failed to get repo info"}
+                msg = f"Failed to get repo info (HTTP {repo_response.status_code}): {self._truncated_response_text(repo_response)}"
+                logger.warning("_create_branch_and_push %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
             default_branch = repo_response.json().get("default_branch", "main")
 
+            # GET the default branch to confirm the repo is non-empty; SHA is intentionally unused —
+            # POST /branches takes a branch name, not a SHA.
             ref_response = await client.get(
                 f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{default_branch}", headers=headers
             )
             if ref_response.status_code != 200:
                 return await self._create_initial_commit(client, headers, api_base, owner, repo, branch, files)
 
-            base_sha = self._ref_sha(ref_response.json())
-
             create_ref = await client.post(
-                f"{api_base}/repos/{owner}/{repo}/git/refs",
+                f"{api_base}/repos/{owner}/{repo}/branches",
                 headers=headers,
-                json={"ref": f"refs/heads/{branch}", "sha": base_sha},
+                json={"new_branch_name": branch, "old_ref_name": default_branch},
             )
+            if create_ref.status_code == 403:
+                msg = f"Permission denied creating branch '{branch}' — token may lack write access to this repository"
+                logger.warning("_create_branch_and_push %s/%s: 403 %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
+            if create_ref.status_code == 409:
+                msg = f"Branch '{branch}' already exists (possible race condition)"
+                logger.warning("_create_branch_and_push %s/%s: 409 %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
             if create_ref.status_code != 201:
-                return {
-                    "status": "failed",
-                    "message": f"Failed to create branch: {self._truncated_response_text(create_ref)}",
-                }
+                msg = f"Failed to create branch '{branch}' (HTTP {create_ref.status_code}): {self._truncated_response_text(create_ref)}"
+                logger.warning("_create_branch_and_push %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
-            return await self.push_files(repo_url, token, branch, files, client)
+            logger.info("Re-entering push_files after branch create %s/%s -> %s", owner, repo, branch)
+            return await self.push_files(repo_url, token, branch, files, client, _allow_branch_create=False)
 
         except Exception as e:
-            return {"status": "failed", "message": str(e)}
+            logger.exception("_create_branch_and_push failed for %s/%s branch=%s", owner, repo, branch)
+            return {"status": "failed", "message": str(e), "error": str(e)}
 
     async def _create_initial_commit(
         self,
@@ -305,13 +348,18 @@ class GiteaBackend(GitHubBackend):
 
             data = response.json()
             commit_sha = (data.get("commit") or {}).get("sha")
+            message = (
+                f"Initial backup created - {len(files)} files"
+                if commit_sha
+                else f"Initial backup created - {len(files)} files (commit SHA not reported by server)"
+            )
             return {
                 "status": "success",
-                "message": f"Initial backup created - {len(files)} files",
+                "message": message,
                 "commit_sha": commit_sha,
                 "files_changed": len(files),
             }
 
         except Exception as e:
-            logger.error("Gitea initial commit failed: %s", e)
+            logger.exception("_create_initial_commit failed for %s/%s branch=%s", owner, repo, branch)
             return {"status": "failed", "message": str(e), "error": str(e)}

+ 151 - 55
backend/app/services/git_providers/github.py

@@ -97,10 +97,16 @@ class GitHubBackend(GitProviderBackend):
             }
 
         except Exception as e:
-            logger.error("Git connection test failed: %s", e)
+            logger.exception("Git connection test failed")
+            detail = str(e)[:200]
+            message = (
+                f"Connection failed: {type(e).__name__}: {detail}"
+                if detail
+                else f"Connection failed: {type(e).__name__}"
+            )
             return {
                 "success": False,
-                "message": f"Connection failed: {type(e).__name__}",
+                "message": message,
                 "repo_name": None,
                 "permissions": None,
             }
@@ -112,6 +118,7 @@ class GitHubBackend(GitProviderBackend):
         branch: str,
         files: dict,
         client: httpx.AsyncClient,
+        _allow_branch_create: bool = True,
     ) -> dict:
         """Push files to the repository using the Git Data API."""
         try:
@@ -122,35 +129,72 @@ class GitHubBackend(GitProviderBackend):
             ref_response = await client.get(f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{branch}", headers=headers)
 
             if ref_response.status_code == 404:
+                if not _allow_branch_create:
+                    return {
+                        "status": "failed",
+                        "message": (
+                            f"Branch '{branch}' not found after creation — possible replication lag. "
+                            "The next scheduled backup will retry."
+                        ),
+                    }
                 return await self._create_branch_and_push(
                     client, headers, api_base, owner, repo, branch, files, repo_url, token
                 )
 
             if ref_response.status_code != 200:
-                return {
-                    "status": "failed",
-                    "message": f"Failed to get branch ref: {ref_response.status_code}",
-                    "error": self._truncated_response_text(ref_response),
-                }
+                msg = f"Failed to get branch ref (HTTP {ref_response.status_code}): {self._truncated_response_text(ref_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg, "error": self._truncated_response_text(ref_response)}
 
-            current_commit_sha = ref_response.json()["object"]["sha"]
+            current_commit_sha, err = self._read_sha(ref_response, "object", "sha")
+            if err:
+                msg = f"Malformed ref response ({err}): {self._truncated_response_text(ref_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
             commit_response = await client.get(
                 f"{api_base}/repos/{owner}/{repo}/git/commits/{current_commit_sha}", headers=headers
             )
             if commit_response.status_code != 200:
-                return {"status": "failed", "message": "Failed to get current commit"}
+                msg = f"Failed to get current commit (HTTP {commit_response.status_code}): {self._truncated_response_text(commit_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
-            current_tree_sha = commit_response.json()["tree"]["sha"]
+            current_tree_sha, err = self._read_sha(commit_response, "tree", "sha")
+            if err:
+                msg = f"Malformed commit response ({err}): {self._truncated_response_text(commit_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
             tree_response = await client.get(
                 f"{api_base}/repos/{owner}/{repo}/git/trees/{current_tree_sha}?recursive=1", headers=headers
             )
+            if tree_response.status_code != 200:
+                msg = f"Failed to list existing tree (HTTP {tree_response.status_code}): {self._truncated_response_text(tree_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg, "error": self._truncated_response_text(tree_response)}
+            tree_data = tree_response.json()
+            # GitHub's tree API truncates >7MB / >100k entries. A truncated tree
+            # listing makes the SHA-equality dedup miss and every file gets
+            # re-uploaded as a new blob each run — silent churn until someone
+            # notices the bloated history. Fail loudly so the user rotates the
+            # backup repo.
+            if tree_data.get("truncated"):
+                msg = (
+                    "Repository tree exceeds the GitHub API listing limit (truncated=true). "
+                    "Rotate the backup repository to avoid silent file-by-file churn on every backup."
+                )
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
             existing_files: dict[str, str] = {}
-            if tree_response.status_code == 200:
-                for item in tree_response.json().get("tree", []):
-                    if item["type"] == "blob":
-                        existing_files[item["path"]] = item["sha"]
+            for item in tree_data.get("tree", []):
+                if item.get("type") != "blob":
+                    continue
+                path, sha = item.get("path"), item.get("sha")
+                if not path or not sha:
+                    logger.warning("push_files: skipping malformed tree entry: %s", item)
+                    continue
+                existing_files[path] = sha
 
             tree_items = []
             files_changed = 0
@@ -168,11 +212,21 @@ class GitHubBackend(GitProviderBackend):
                     headers=headers,
                     json={"content": base64.b64encode(content_bytes).decode(), "encoding": "base64"},
                 )
+                if blob_response.status_code == 404:
+                    msg = "GitHub API returned 404 for POST /git/blobs — check repository visibility and token scope"
+                    logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                    return {"status": "failed", "message": msg}
                 if blob_response.status_code != 201:
-                    logger.error("Failed to create blob for %s: %s", path, self._truncated_response_text(blob_response))
-                    continue
-
-                tree_items.append({"path": path, "mode": "100644", "type": "blob", "sha": blob_response.json()["sha"]})
+                    msg = f"Failed to create blob for {path} (HTTP {blob_response.status_code}): {self._truncated_response_text(blob_response)}"
+                    logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                    return {"status": "failed", "message": msg}
+
+                blob_sha, err = self._read_sha(blob_response, "sha")
+                if err:
+                    msg = f"Malformed blob response for {path} ({err}): {self._truncated_response_text(blob_response)}"
+                    logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                    return {"status": "failed", "message": msg}
+                tree_items.append({"path": path, "mode": "100644", "type": "blob", "sha": blob_sha})
                 files_changed += 1
 
             if not tree_items:
@@ -184,12 +238,15 @@ class GitHubBackend(GitProviderBackend):
                 json={"base_tree": current_tree_sha, "tree": tree_items},
             )
             if tree_response.status_code != 201:
-                return {
-                    "status": "failed",
-                    "message": f"Failed to create tree: {self._truncated_response_text(tree_response)}",
-                }
-
-            new_tree_sha = tree_response.json()["sha"]
+                msg = f"Failed to create tree (HTTP {tree_response.status_code}): {self._truncated_response_text(tree_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
+
+            new_tree_sha, err = self._read_sha(tree_response, "sha")
+            if err:
+                msg = f"Malformed tree-create response ({err}): {self._truncated_response_text(tree_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
             commit_message = f"Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"
             commit_response = await client.post(
                 f"{api_base}/repos/{owner}/{repo}/git/commits",
@@ -197,12 +254,15 @@ class GitHubBackend(GitProviderBackend):
                 json={"message": commit_message, "tree": new_tree_sha, "parents": [current_commit_sha]},
             )
             if commit_response.status_code != 201:
-                return {
-                    "status": "failed",
-                    "message": f"Failed to create commit: {self._truncated_response_text(commit_response)}",
-                }
+                msg = f"Failed to create commit (HTTP {commit_response.status_code}): {self._truncated_response_text(commit_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
-            new_commit_sha = commit_response.json()["sha"]
+            new_commit_sha, err = self._read_sha(commit_response, "sha")
+            if err:
+                msg = f"Malformed commit-create response ({err}): {self._truncated_response_text(commit_response)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
             ref_update = await client.patch(
                 f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{branch}",
@@ -210,10 +270,9 @@ class GitHubBackend(GitProviderBackend):
                 json={"sha": new_commit_sha},
             )
             if ref_update.status_code != 200:
-                return {
-                    "status": "failed",
-                    "message": f"Failed to update branch: {self._truncated_response_text(ref_update)}",
-                }
+                msg = f"Failed to update branch (HTTP {ref_update.status_code}): {self._truncated_response_text(ref_update)}"
+                logger.warning("push_files %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
             return {
                 "status": "success",
@@ -223,7 +282,7 @@ class GitHubBackend(GitProviderBackend):
             }
 
         except Exception as e:
-            logger.error("Push to Git failed: %s", e)
+            logger.exception("push_files failed for %s branch=%s", repo_url, branch)
             return {"status": "failed", "message": str(e), "error": str(e)}
 
     async def _create_branch_and_push(
@@ -242,9 +301,16 @@ class GitHubBackend(GitProviderBackend):
         try:
             repo_response = await client.get(f"{api_base}/repos/{owner}/{repo}", headers=headers)
             if repo_response.status_code != 200:
-                return {"status": "failed", "message": "Failed to get repo info"}
+                msg = f"Failed to get repo info (HTTP {repo_response.status_code}): {self._truncated_response_text(repo_response)}"
+                logger.warning("_create_branch_and_push %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
-            default_branch = repo_response.json().get("default_branch", "main")
+            try:
+                default_branch = repo_response.json().get("default_branch", "main")
+            except ValueError:
+                msg = f"Malformed repo-info response (non-JSON body): {self._truncated_response_text(repo_response)}"
+                logger.warning("_create_branch_and_push %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
             ref_response = await client.get(
                 f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{default_branch}", headers=headers
@@ -252,7 +318,11 @@ class GitHubBackend(GitProviderBackend):
             if ref_response.status_code != 200:
                 return await self._create_initial_commit(client, headers, api_base, owner, repo, branch, files)
 
-            base_sha = ref_response.json()["object"]["sha"]
+            base_sha, err = self._read_sha(ref_response, "object", "sha")
+            if err:
+                msg = f"Malformed default-branch ref response ({err}): {self._truncated_response_text(ref_response)}"
+                logger.warning("_create_branch_and_push %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
             create_ref = await client.post(
                 f"{api_base}/repos/{owner}/{repo}/git/refs",
@@ -260,15 +330,16 @@ class GitHubBackend(GitProviderBackend):
                 json={"ref": f"refs/heads/{branch}", "sha": base_sha},
             )
             if create_ref.status_code != 201:
-                return {
-                    "status": "failed",
-                    "message": f"Failed to create branch: {self._truncated_response_text(create_ref)}",
-                }
+                msg = f"Failed to create branch '{branch}' (HTTP {create_ref.status_code}): {self._truncated_response_text(create_ref)}"
+                logger.warning("_create_branch_and_push %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
-            return await self.push_files(repo_url, token, branch, files, client)
+            logger.info("Re-entering push_files after branch create %s/%s -> %s", owner, repo, branch)
+            return await self.push_files(repo_url, token, branch, files, client, _allow_branch_create=False)
 
         except Exception as e:
-            return {"status": "failed", "message": str(e)}
+            logger.exception("_create_branch_and_push failed for %s/%s branch=%s", owner, repo, branch)
+            return {"status": "failed", "message": str(e), "error": str(e)}
 
     async def _create_initial_commit(
         self,
@@ -290,10 +361,20 @@ class GitHubBackend(GitProviderBackend):
                     headers=headers,
                     json={"content": base64.b64encode(content_str.encode()).decode(), "encoding": "base64"},
                 )
-                if blob_response.status_code == 201:
-                    tree_items.append(
-                        {"path": path, "mode": "100644", "type": "blob", "sha": blob_response.json()["sha"]}
-                    )
+                if blob_response.status_code == 404:
+                    msg = "GitHub API returned 404 for POST /git/blobs — check repository visibility and token scope"
+                    logger.warning("_create_initial_commit %s/%s: %s", owner, repo, msg)
+                    return {"status": "failed", "message": msg}
+                if blob_response.status_code != 201:
+                    msg = f"Failed to create blob for {path} (HTTP {blob_response.status_code}): {self._truncated_response_text(blob_response)}"
+                    logger.warning("_create_initial_commit %s/%s: %s", owner, repo, msg)
+                    return {"status": "failed", "message": msg}
+                blob_sha, err = self._read_sha(blob_response, "sha")
+                if err:
+                    msg = f"Malformed blob response for {path} ({err}): {self._truncated_response_text(blob_response)}"
+                    logger.warning("_create_initial_commit %s/%s: %s", owner, repo, msg)
+                    return {"status": "failed", "message": msg}
+                tree_items.append({"path": path, "mode": "100644", "type": "blob", "sha": blob_sha})
 
             tree_response = await client.post(
                 f"{api_base}/repos/{owner}/{repo}/git/trees",
@@ -301,9 +382,15 @@ class GitHubBackend(GitProviderBackend):
                 json={"tree": tree_items},
             )
             if tree_response.status_code != 201:
-                return {"status": "failed", "message": "Failed to create tree"}
-
-            tree_sha = tree_response.json()["sha"]
+                msg = f"Failed to create tree (HTTP {tree_response.status_code}): {self._truncated_response_text(tree_response)}"
+                logger.warning("_create_initial_commit %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
+
+            tree_sha, err = self._read_sha(tree_response, "sha")
+            if err:
+                msg = f"Malformed tree-create response ({err}): {self._truncated_response_text(tree_response)}"
+                logger.warning("_create_initial_commit %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
             commit_response = await client.post(
                 f"{api_base}/repos/{owner}/{repo}/git/commits",
                 headers=headers,
@@ -313,16 +400,24 @@ class GitHubBackend(GitProviderBackend):
                 },
             )
             if commit_response.status_code != 201:
-                return {"status": "failed", "message": "Failed to create commit"}
-
-            commit_sha = commit_response.json()["sha"]
+                msg = f"Failed to create commit (HTTP {commit_response.status_code}): {self._truncated_response_text(commit_response)}"
+                logger.warning("_create_initial_commit %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
+
+            commit_sha, err = self._read_sha(commit_response, "sha")
+            if err:
+                msg = f"Malformed commit-create response ({err}): {self._truncated_response_text(commit_response)}"
+                logger.warning("_create_initial_commit %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
             ref_response = await client.post(
                 f"{api_base}/repos/{owner}/{repo}/git/refs",
                 headers=headers,
                 json={"ref": f"refs/heads/{branch}", "sha": commit_sha},
             )
             if ref_response.status_code != 201:
-                return {"status": "failed", "message": "Failed to create branch ref"}
+                msg = f"Failed to create branch ref (HTTP {ref_response.status_code}): {self._truncated_response_text(ref_response)}"
+                logger.warning("_create_initial_commit %s/%s: %s", owner, repo, msg)
+                return {"status": "failed", "message": msg}
 
             return {
                 "status": "success",
@@ -332,4 +427,5 @@ class GitHubBackend(GitProviderBackend):
             }
 
         except Exception as e:
-            return {"status": "failed", "message": str(e)}
+            logger.exception("_create_initial_commit failed for %s/%s branch=%s", owner, repo, branch)
+            return {"status": "failed", "message": str(e), "error": str(e)}

+ 9 - 7
backend/app/services/github_backup.py

@@ -76,8 +76,8 @@ class GitHubBackupService:
                 await self._check_scheduled_backups()
             except asyncio.CancelledError:
                 break
-            except Exception as e:
-                logger.error("Error in GitHub backup scheduler: %s", e)
+            except Exception:
+                logger.exception("Error in GitHub backup scheduler")
                 await asyncio.sleep(60)
 
     async def _check_scheduled_backups(self):
@@ -202,7 +202,7 @@ class GitHubBackupService:
                     }
 
                 except Exception as e:
-                    logger.error("Backup failed: %s", e)
+                    logger.exception("Backup failed")
                     log.status = "failed"
                     log.completed_at = datetime.now(timezone.utc)
                     log.error_message = str(e)
@@ -381,12 +381,14 @@ class GitHubBackupService:
                 }
 
             logger.info(
-                f"Collected cloud profiles: {len(filament_settings)} filament, "
-                f"{len(printer_settings)} printer, {len(process_settings)} process"
+                "Collected cloud profiles: %d filament, %d printer, %d process",
+                len(filament_settings),
+                len(printer_settings),
+                len(process_settings),
             )
 
-        except Exception as e:
-            logger.warning("Failed to collect cloud profiles: %s", e)
+        except Exception:
+            logger.warning("Failed to collect cloud profiles", exc_info=True)
         finally:
             await cloud.close()
 

+ 1 - 3
backend/tests/integration/test_github_backup_api.py

@@ -164,9 +164,7 @@ class TestGitHubBackupConfigAPI:
 
     @pytest.mark.asyncio
     @pytest.mark.integration
-    async def test_update_config_rejects_disabling_insecure_http_for_stored_http_url(
-        self, async_client: AsyncClient
-    ):
+    async def test_update_config_rejects_disabling_insecure_http_for_stored_http_url(self, async_client: AsyncClient):
         """Verify PATCH rejects leaving a stored HTTP URL without explicit insecure-HTTP allowance."""
         create_data = {
             "repository_url": "http://git.example.com/test/httprepo",

File diff suppressed because it is too large
+ 833 - 89
backend/tests/unit/test_git_providers.py


Some files were not shown because too many files changed in this diff