gitea.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. """Gitea backend — overrides GitHubBackend where Gitea's API diverges."""
  2. import base64
  3. import json
  4. import logging
  5. import re
  6. from datetime import datetime, timezone
  7. import httpx
  8. from backend.app.services.git_providers.github import GitHubBackend
  9. logger = logging.getLogger(__name__)
  10. class GiteaBackend(GitHubBackend):
  11. """Backend for Gitea instances.
  12. Gitea's Git Data API (/api/v1/repos/{owner}/{repo}/git/...) is *mostly*
  13. compatible with GitHub's, but diverges on two points that broke real-world
  14. backups (#1224, #1225):
  15. 1. ``GET /git/refs/heads/{branch}`` returns a *list* of matching refs even
  16. when only one matches; GitHub returns a single object. The push paths
  17. below extract the SHA via ``_ref_sha()`` instead of the GitHub-style
  18. ``["object"]["sha"]`` chain.
  19. 2. The Git Data API (blobs/trees/commits/refs) refuses writes against an
  20. empty repository — every blob POST returns 404 until the repo has at
  21. least one commit. ``_create_initial_commit()`` is overridden to use the
  22. Contents API, which seeds the branch + initial commit in a single call.
  23. """
  24. @staticmethod
  25. def _ref_sha(ref_data) -> str:
  26. """Extract the commit SHA from Gitea's list-shaped ref response."""
  27. if isinstance(ref_data, list):
  28. if not ref_data:
  29. raise ValueError("Empty refs list returned by Gitea API")
  30. return ref_data[0]["object"]["sha"]
  31. return ref_data["object"]["sha"]
  32. def parse_repo_url(self, url: str) -> tuple[str, str]:
  33. """Return (owner, repo) — accepts both https:// and http:// for self-hosted instances."""
  34. if not url or len(url) > 500:
  35. raise ValueError("Invalid Git URL: URL too long or empty")
  36. match = re.match(
  37. r"https?://[\w.\-]+(:\d+)?/([\w.\-]{1,100})/([\w.\-]{1,100})(?:\.git)?/?$",
  38. url,
  39. )
  40. if match:
  41. return match.group(2), match.group(3).removesuffix(".git")
  42. match = re.match(
  43. r"git@[\w.\-]+:([\w.\-]{1,100})/([\w.\-]{1,100})(?:\.git)?$",
  44. url,
  45. )
  46. if match:
  47. return match.group(1), match.group(2).removesuffix(".git")
  48. raise ValueError(f"Cannot parse repository URL: {url}")
  49. def get_api_base(self, repo_url: str) -> str:
  50. """Derive API base from the repository URL's scheme and host."""
  51. match = re.match(r"(https?://[\w.\-]+(:\d+)?)/", repo_url)
  52. if match:
  53. return f"{match.group(1)}/api/v1"
  54. raise ValueError(f"Cannot derive API base from URL: {repo_url}")
  55. def get_headers(self, token: str) -> dict:
  56. headers = super().get_headers(token)
  57. headers["Accept"] = "application/json"
  58. return headers
  59. async def push_files(
  60. self,
  61. repo_url: str,
  62. token: str,
  63. branch: str,
  64. files: dict,
  65. client: httpx.AsyncClient,
  66. ) -> dict:
  67. """Push files via the Git Data API, normalising Gitea's list-shaped ref response."""
  68. try:
  69. owner, repo = self.parse_repo_url(repo_url)
  70. api_base = self.get_api_base(repo_url)
  71. headers = self.get_headers(token)
  72. ref_response = await client.get(f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{branch}", headers=headers)
  73. if ref_response.status_code == 404:
  74. return await self._create_branch_and_push(
  75. client, headers, api_base, owner, repo, branch, files, repo_url, token
  76. )
  77. if ref_response.status_code != 200:
  78. return {
  79. "status": "failed",
  80. "message": f"Failed to get branch ref: {ref_response.status_code}",
  81. "error": self._truncated_response_text(ref_response),
  82. }
  83. current_commit_sha = self._ref_sha(ref_response.json())
  84. commit_response = await client.get(
  85. f"{api_base}/repos/{owner}/{repo}/git/commits/{current_commit_sha}", headers=headers
  86. )
  87. if commit_response.status_code != 200:
  88. return {"status": "failed", "message": "Failed to get current commit"}
  89. current_tree_sha = commit_response.json()["tree"]["sha"]
  90. tree_response = await client.get(
  91. f"{api_base}/repos/{owner}/{repo}/git/trees/{current_tree_sha}?recursive=1", headers=headers
  92. )
  93. existing_files: dict[str, str] = {}
  94. if tree_response.status_code == 200:
  95. for item in tree_response.json().get("tree", []):
  96. if item["type"] == "blob":
  97. existing_files[item["path"]] = item["sha"]
  98. tree_items = []
  99. files_changed = 0
  100. for path, content in files.items():
  101. content_str = json.dumps(content, indent=2, default=str)
  102. content_bytes = content_str.encode("utf-8")
  103. content_sha = self._blob_sha(content_bytes)
  104. if path in existing_files and existing_files[path] == content_sha:
  105. continue
  106. blob_response = await client.post(
  107. f"{api_base}/repos/{owner}/{repo}/git/blobs",
  108. headers=headers,
  109. json={"content": base64.b64encode(content_bytes).decode(), "encoding": "base64"},
  110. )
  111. if blob_response.status_code != 201:
  112. logger.error("Failed to create blob for %s: %s", path, self._truncated_response_text(blob_response))
  113. continue
  114. tree_items.append({"path": path, "mode": "100644", "type": "blob", "sha": blob_response.json()["sha"]})
  115. files_changed += 1
  116. if not tree_items:
  117. return {"status": "skipped", "message": "No changes to commit", "commit_sha": None, "files_changed": 0}
  118. tree_response = await client.post(
  119. f"{api_base}/repos/{owner}/{repo}/git/trees",
  120. headers=headers,
  121. json={"base_tree": current_tree_sha, "tree": tree_items},
  122. )
  123. if tree_response.status_code != 201:
  124. return {
  125. "status": "failed",
  126. "message": f"Failed to create tree: {self._truncated_response_text(tree_response)}",
  127. }
  128. new_tree_sha = tree_response.json()["sha"]
  129. commit_message = f"Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"
  130. commit_response = await client.post(
  131. f"{api_base}/repos/{owner}/{repo}/git/commits",
  132. headers=headers,
  133. json={"message": commit_message, "tree": new_tree_sha, "parents": [current_commit_sha]},
  134. )
  135. if commit_response.status_code != 201:
  136. return {
  137. "status": "failed",
  138. "message": f"Failed to create commit: {self._truncated_response_text(commit_response)}",
  139. }
  140. new_commit_sha = commit_response.json()["sha"]
  141. ref_update = await client.patch(
  142. f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{branch}",
  143. headers=headers,
  144. json={"sha": new_commit_sha},
  145. )
  146. if ref_update.status_code != 200:
  147. return {
  148. "status": "failed",
  149. "message": f"Failed to update branch: {self._truncated_response_text(ref_update)}",
  150. }
  151. return {
  152. "status": "success",
  153. "message": f"Backup successful - {files_changed} files updated",
  154. "commit_sha": new_commit_sha,
  155. "files_changed": files_changed,
  156. }
  157. except Exception as e:
  158. logger.error("Push to Git failed: %s", e)
  159. return {"status": "failed", "message": str(e), "error": str(e)}
  160. async def _create_branch_and_push(
  161. self,
  162. client: httpx.AsyncClient,
  163. headers: dict,
  164. api_base: str,
  165. owner: str,
  166. repo: str,
  167. branch: str,
  168. files: dict,
  169. repo_url: str,
  170. token: str,
  171. ) -> dict:
  172. """Create branch (from default branch or as initial commit) then push."""
  173. try:
  174. repo_response = await client.get(f"{api_base}/repos/{owner}/{repo}", headers=headers)
  175. if repo_response.status_code != 200:
  176. return {"status": "failed", "message": "Failed to get repo info"}
  177. default_branch = repo_response.json().get("default_branch", "main")
  178. ref_response = await client.get(
  179. f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{default_branch}", headers=headers
  180. )
  181. if ref_response.status_code != 200:
  182. return await self._create_initial_commit(client, headers, api_base, owner, repo, branch, files)
  183. base_sha = self._ref_sha(ref_response.json())
  184. create_ref = await client.post(
  185. f"{api_base}/repos/{owner}/{repo}/git/refs",
  186. headers=headers,
  187. json={"ref": f"refs/heads/{branch}", "sha": base_sha},
  188. )
  189. if create_ref.status_code != 201:
  190. return {
  191. "status": "failed",
  192. "message": f"Failed to create branch: {self._truncated_response_text(create_ref)}",
  193. }
  194. return await self.push_files(repo_url, token, branch, files, client)
  195. except Exception as e:
  196. return {"status": "failed", "message": str(e)}
  197. async def _create_initial_commit(
  198. self,
  199. client: httpx.AsyncClient,
  200. headers: dict,
  201. api_base: str,
  202. owner: str,
  203. repo: str,
  204. branch: str,
  205. files: dict,
  206. ) -> dict:
  207. """Seed an empty Gitea repository via the Contents API.
  208. Gitea's Git Data API requires the repository to have at least one
  209. commit before it accepts blob/tree/commit writes; on an empty repo
  210. every ``POST /git/blobs`` returns 404. The Contents API is the
  211. documented bootstrap path: a single ``POST /repos/{owner}/{repo}/contents``
  212. with a ``files`` array creates the initial commit and the target
  213. branch in one round-trip (Gitea 1.18+, Forgejo all versions).
  214. """
  215. try:
  216. if not files:
  217. return {"status": "skipped", "message": "No files to commit", "commit_sha": None, "files_changed": 0}
  218. api_files = []
  219. for path, content in files.items():
  220. content_str = json.dumps(content, indent=2, default=str)
  221. content_b64 = base64.b64encode(content_str.encode("utf-8")).decode()
  222. api_files.append({"operation": "create", "path": path, "content": content_b64})
  223. commit_message = f"Initial Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"
  224. body = {
  225. "branch": branch,
  226. "new_branch": branch,
  227. "message": commit_message,
  228. "files": api_files,
  229. }
  230. response = await client.post(
  231. f"{api_base}/repos/{owner}/{repo}/contents",
  232. headers=headers,
  233. json=body,
  234. )
  235. if response.status_code not in (200, 201):
  236. return {
  237. "status": "failed",
  238. "message": f"Failed to create initial commit: {self._truncated_response_text(response)}",
  239. }
  240. data = response.json()
  241. commit_sha = (data.get("commit") or {}).get("sha")
  242. return {
  243. "status": "success",
  244. "message": f"Initial backup created - {len(files)} files",
  245. "commit_sha": commit_sha,
  246. "files_changed": len(files),
  247. }
  248. except Exception as e:
  249. logger.error("Gitea initial commit failed: %s", e)
  250. return {"status": "failed", "message": str(e), "error": str(e)}