gitea.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. """Gitea backend — overrides GitHubBackend where Gitea's API diverges."""
  2. import base64
  3. import json
  4. import logging
  5. import re
  6. from datetime import datetime, timezone
  7. import httpx
  8. from backend.app.services.git_providers.github import GitHubBackend
  9. logger = logging.getLogger(__name__)
  10. class GiteaBackend(GitHubBackend):
  11. """Backend for Gitea instances.
  12. Gitea's Git Data API (/api/v1/repos/{owner}/{repo}/git/...) is *mostly*
  13. compatible with GitHub's, but diverges on two points that broke real-world
  14. backups (#1224, #1225):
  15. 1. ``GET /git/refs/heads/{branch}`` returns a *list* of matching refs even
  16. when only one matches; GitHub returns a single object. The push paths
  17. below extract the SHA via ``_ref_sha()`` instead of the GitHub-style
  18. ``["object"]["sha"]`` chain.
  19. 2. The Git Data API (blobs/trees/commits/refs) refuses writes against an
  20. empty repository — every blob POST returns 404 until the repo has at
  21. least one commit. ``_create_initial_commit()`` is overridden to use the
  22. Contents API, which seeds the branch + initial commit in a single call.
  23. """
  24. @staticmethod
  25. def _ref_sha(ref_data) -> str:
  26. """Extract the commit SHA from Gitea's list-shaped ref response."""
  27. if isinstance(ref_data, list):
  28. if not ref_data:
  29. raise ValueError("Empty refs list returned by Gitea API")
  30. return ref_data[0]["object"]["sha"]
  31. return ref_data["object"]["sha"]
  32. @staticmethod
  33. def _commit_tree_sha(commit_data: dict) -> str | None:
  34. """Extract the tree SHA from a commit response.
  35. GitHub's ``GET /git/commits/{sha}`` returns the GitCommit schema with
  36. ``tree`` at the top level. Gitea's same-named endpoint returns the
  37. wrapped Commit schema where ``tree`` lives under ``commit``. Try the
  38. flat shape first (GitHub-compatible deployments / Gitea ≤ 1.23) then
  39. fall back to the wrapped shape (Gitea 1.24+, Forgejo).
  40. """
  41. tree_node = commit_data.get("tree")
  42. if not isinstance(tree_node, dict):
  43. tree_node = (commit_data.get("commit") or {}).get("tree")
  44. if isinstance(tree_node, dict):
  45. return tree_node.get("sha")
  46. return None
  47. def parse_repo_url(self, url: str) -> tuple[str, str]:
  48. """Return (owner, repo) — accepts both https:// and http:// for self-hosted instances."""
  49. if not url or len(url) > 500:
  50. raise ValueError("Invalid Git URL: URL too long or empty")
  51. match = re.match(
  52. r"https?://[\w.\-]+(:\d+)?/([\w.\-]{1,100})/([\w.\-]{1,100})(?:\.git)?/?$",
  53. url,
  54. )
  55. if match:
  56. return match.group(2), match.group(3).removesuffix(".git")
  57. match = re.match(
  58. r"git@[\w.\-]+:([\w.\-]{1,100})/([\w.\-]{1,100})(?:\.git)?$",
  59. url,
  60. )
  61. if match:
  62. return match.group(1), match.group(2).removesuffix(".git")
  63. raise ValueError(f"Cannot parse repository URL: {url}")
  64. def get_api_base(self, repo_url: str) -> str:
  65. """Derive API base from the repository URL's scheme and host."""
  66. match = re.match(r"(https?://[\w.\-]+(:\d+)?)/", repo_url)
  67. if match:
  68. return f"{match.group(1)}/api/v1"
  69. raise ValueError(f"Cannot derive API base from URL: {repo_url}")
  70. def get_headers(self, token: str) -> dict:
  71. headers = super().get_headers(token)
  72. headers["Accept"] = "application/json"
  73. return headers
  74. async def push_files(
  75. self,
  76. repo_url: str,
  77. token: str,
  78. branch: str,
  79. files: dict,
  80. client: httpx.AsyncClient,
  81. ) -> dict:
  82. """Push files via the Git Data API, normalising Gitea's list-shaped ref response."""
  83. try:
  84. owner, repo = self.parse_repo_url(repo_url)
  85. api_base = self.get_api_base(repo_url)
  86. headers = self.get_headers(token)
  87. ref_response = await client.get(f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{branch}", headers=headers)
  88. if ref_response.status_code == 404:
  89. return await self._create_branch_and_push(
  90. client, headers, api_base, owner, repo, branch, files, repo_url, token
  91. )
  92. if ref_response.status_code != 200:
  93. return {
  94. "status": "failed",
  95. "message": f"Failed to get branch ref: {ref_response.status_code}",
  96. "error": self._truncated_response_text(ref_response),
  97. }
  98. current_commit_sha = self._ref_sha(ref_response.json())
  99. commit_response = await client.get(
  100. f"{api_base}/repos/{owner}/{repo}/git/commits/{current_commit_sha}", headers=headers
  101. )
  102. if commit_response.status_code != 200:
  103. return {"status": "failed", "message": "Failed to get current commit"}
  104. current_tree_sha = self._commit_tree_sha(commit_response.json())
  105. if not current_tree_sha:
  106. return {"status": "failed", "message": "Failed to extract tree SHA from commit response"}
  107. tree_response = await client.get(
  108. f"{api_base}/repos/{owner}/{repo}/git/trees/{current_tree_sha}?recursive=1", headers=headers
  109. )
  110. existing_files: dict[str, str] = {}
  111. if tree_response.status_code == 200:
  112. for item in tree_response.json().get("tree", []):
  113. if item["type"] == "blob":
  114. existing_files[item["path"]] = item["sha"]
  115. tree_items = []
  116. files_changed = 0
  117. for path, content in files.items():
  118. content_str = json.dumps(content, indent=2, default=str)
  119. content_bytes = content_str.encode("utf-8")
  120. content_sha = self._blob_sha(content_bytes)
  121. if path in existing_files and existing_files[path] == content_sha:
  122. continue
  123. blob_response = await client.post(
  124. f"{api_base}/repos/{owner}/{repo}/git/blobs",
  125. headers=headers,
  126. json={"content": base64.b64encode(content_bytes).decode(), "encoding": "base64"},
  127. )
  128. if blob_response.status_code != 201:
  129. logger.error("Failed to create blob for %s: %s", path, self._truncated_response_text(blob_response))
  130. continue
  131. tree_items.append({"path": path, "mode": "100644", "type": "blob", "sha": blob_response.json()["sha"]})
  132. files_changed += 1
  133. if not tree_items:
  134. return {"status": "skipped", "message": "No changes to commit", "commit_sha": None, "files_changed": 0}
  135. tree_response = await client.post(
  136. f"{api_base}/repos/{owner}/{repo}/git/trees",
  137. headers=headers,
  138. json={"base_tree": current_tree_sha, "tree": tree_items},
  139. )
  140. if tree_response.status_code != 201:
  141. return {
  142. "status": "failed",
  143. "message": f"Failed to create tree: {self._truncated_response_text(tree_response)}",
  144. }
  145. new_tree_sha = tree_response.json()["sha"]
  146. commit_message = f"Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"
  147. commit_response = await client.post(
  148. f"{api_base}/repos/{owner}/{repo}/git/commits",
  149. headers=headers,
  150. json={"message": commit_message, "tree": new_tree_sha, "parents": [current_commit_sha]},
  151. )
  152. if commit_response.status_code != 201:
  153. return {
  154. "status": "failed",
  155. "message": f"Failed to create commit: {self._truncated_response_text(commit_response)}",
  156. }
  157. new_commit_sha = commit_response.json()["sha"]
  158. ref_update = await client.patch(
  159. f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{branch}",
  160. headers=headers,
  161. json={"sha": new_commit_sha},
  162. )
  163. if ref_update.status_code != 200:
  164. return {
  165. "status": "failed",
  166. "message": f"Failed to update branch: {self._truncated_response_text(ref_update)}",
  167. }
  168. return {
  169. "status": "success",
  170. "message": f"Backup successful - {files_changed} files updated",
  171. "commit_sha": new_commit_sha,
  172. "files_changed": files_changed,
  173. }
  174. except Exception as e:
  175. logger.error("Push to Git failed: %s", e)
  176. return {"status": "failed", "message": str(e), "error": str(e)}
  177. async def _create_branch_and_push(
  178. self,
  179. client: httpx.AsyncClient,
  180. headers: dict,
  181. api_base: str,
  182. owner: str,
  183. repo: str,
  184. branch: str,
  185. files: dict,
  186. repo_url: str,
  187. token: str,
  188. ) -> dict:
  189. """Create branch (from default branch or as initial commit) then push."""
  190. try:
  191. repo_response = await client.get(f"{api_base}/repos/{owner}/{repo}", headers=headers)
  192. if repo_response.status_code != 200:
  193. return {"status": "failed", "message": "Failed to get repo info"}
  194. default_branch = repo_response.json().get("default_branch", "main")
  195. ref_response = await client.get(
  196. f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{default_branch}", headers=headers
  197. )
  198. if ref_response.status_code != 200:
  199. return await self._create_initial_commit(client, headers, api_base, owner, repo, branch, files)
  200. base_sha = self._ref_sha(ref_response.json())
  201. create_ref = await client.post(
  202. f"{api_base}/repos/{owner}/{repo}/git/refs",
  203. headers=headers,
  204. json={"ref": f"refs/heads/{branch}", "sha": base_sha},
  205. )
  206. if create_ref.status_code != 201:
  207. return {
  208. "status": "failed",
  209. "message": f"Failed to create branch: {self._truncated_response_text(create_ref)}",
  210. }
  211. return await self.push_files(repo_url, token, branch, files, client)
  212. except Exception as e:
  213. return {"status": "failed", "message": str(e)}
  214. async def _create_initial_commit(
  215. self,
  216. client: httpx.AsyncClient,
  217. headers: dict,
  218. api_base: str,
  219. owner: str,
  220. repo: str,
  221. branch: str,
  222. files: dict,
  223. ) -> dict:
  224. """Seed an empty Gitea repository via the Contents API.
  225. Gitea's Git Data API requires the repository to have at least one
  226. commit before it accepts blob/tree/commit writes; on an empty repo
  227. every ``POST /git/blobs`` returns 404. The Contents API is the
  228. documented bootstrap path: a single ``POST /repos/{owner}/{repo}/contents``
  229. with a ``files`` array creates the initial commit and the target
  230. branch in one round-trip (Gitea 1.18+, Forgejo all versions).
  231. """
  232. try:
  233. if not files:
  234. return {"status": "skipped", "message": "No files to commit", "commit_sha": None, "files_changed": 0}
  235. api_files = []
  236. for path, content in files.items():
  237. content_str = json.dumps(content, indent=2, default=str)
  238. content_b64 = base64.b64encode(content_str.encode("utf-8")).decode()
  239. api_files.append({"operation": "create", "path": path, "content": content_b64})
  240. commit_message = f"Initial Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"
  241. body = {
  242. "branch": branch,
  243. "new_branch": branch,
  244. "message": commit_message,
  245. "files": api_files,
  246. }
  247. response = await client.post(
  248. f"{api_base}/repos/{owner}/{repo}/contents",
  249. headers=headers,
  250. json=body,
  251. )
  252. if response.status_code not in (200, 201):
  253. return {
  254. "status": "failed",
  255. "message": f"Failed to create initial commit: {self._truncated_response_text(response)}",
  256. }
  257. data = response.json()
  258. commit_sha = (data.get("commit") or {}).get("sha")
  259. return {
  260. "status": "success",
  261. "message": f"Initial backup created - {len(files)} files",
  262. "commit_sha": commit_sha,
  263. "files_changed": len(files),
  264. }
  265. except Exception as e:
  266. logger.error("Gitea initial commit failed: %s", e)
  267. return {"status": "failed", "message": str(e), "error": str(e)}