github.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. """GitHub backend — implements GitProviderBackend using the GitHub Git Data API."""
  2. import base64
  3. import json
  4. import logging
  5. import re
  6. from datetime import datetime, timezone
  7. import httpx
  8. from backend.app.services.git_providers.base import GitProviderBackend
  9. logger = logging.getLogger(__name__)
  10. class GitHubBackend(GitProviderBackend):
  11. """Backend for github.com using the GitHub Git Data API."""
  12. def get_api_base(self, repo_url: str) -> str:
  13. m = re.match(r"https?://([\w.\-]+(:\d+)?)/", repo_url)
  14. if m:
  15. host = m.group(1)
  16. return "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3"
  17. m = re.match(r"git@([\w.\-]+):", repo_url)
  18. if m:
  19. host = m.group(1)
  20. return "https://api.github.com" if host == "github.com" else f"https://{host}/api/v3"
  21. return "https://api.github.com"
  22. def parse_repo_url(self, url: str) -> tuple[str, str]:
  23. """Return (owner, repo) from a Git HTTPS or SSH URL."""
  24. if not url or len(url) > 500:
  25. raise ValueError("Invalid Git URL: URL too long or empty")
  26. # HTTPS: https://<host>[:<port>]/<owner>/<repo>[.git][/]
  27. match = re.match(
  28. r"https://[\w.\-]+(:\d+)?/([\w.\-]{1,100})/([\w.\-]{1,100})(?:\.git)?/?$",
  29. url,
  30. )
  31. if match:
  32. return match.group(2), match.group(3).removesuffix(".git")
  33. # SSH: git@<host>:<owner>/<repo>[.git]
  34. match = re.match(
  35. r"git@[\w.\-]+:([\w.\-]{1,100})/([\w.\-]{1,100})(?:\.git)?$",
  36. url,
  37. )
  38. if match:
  39. return match.group(1), match.group(2).removesuffix(".git")
  40. raise ValueError(f"Cannot parse repository URL: {url}")
  41. async def test_connection(self, repo_url: str, token: str, client: httpx.AsyncClient) -> dict:
  42. """Test API access and push permission for the repository."""
  43. try:
  44. owner, repo = self.parse_repo_url(repo_url)
  45. api_base = self.get_api_base(repo_url)
  46. headers = self.get_headers(token)
  47. response = await client.get(f"{api_base}/repos/{owner}/{repo}", headers=headers)
  48. if response.status_code == 401:
  49. return {"success": False, "message": "Invalid access token", "repo_name": None, "permissions": None}
  50. if response.status_code == 404:
  51. return {
  52. "success": False,
  53. "message": "Repository not found. Check URL and token permissions.",
  54. "repo_name": None,
  55. "permissions": None,
  56. }
  57. if response.status_code != 200:
  58. return {
  59. "success": False,
  60. "message": f"API error: {response.status_code}",
  61. "repo_name": None,
  62. "permissions": None,
  63. }
  64. data = response.json()
  65. permissions = data.get("permissions", {})
  66. if not permissions.get("push", False):
  67. return {
  68. "success": False,
  69. "message": "Token does not have push permission to this repository",
  70. "repo_name": data.get("full_name"),
  71. "permissions": permissions,
  72. }
  73. return {
  74. "success": True,
  75. "message": "Connection successful",
  76. "repo_name": data.get("full_name"),
  77. "permissions": permissions,
  78. }
  79. except Exception as e:
  80. logger.error("Git connection test failed: %s", e)
  81. return {
  82. "success": False,
  83. "message": f"Connection failed: {type(e).__name__}",
  84. "repo_name": None,
  85. "permissions": None,
  86. }
  87. async def push_files(
  88. self,
  89. repo_url: str,
  90. token: str,
  91. branch: str,
  92. files: dict,
  93. client: httpx.AsyncClient,
  94. ) -> dict:
  95. """Push files to the repository using the Git Data API."""
  96. try:
  97. owner, repo = self.parse_repo_url(repo_url)
  98. api_base = self.get_api_base(repo_url)
  99. headers = self.get_headers(token)
  100. ref_response = await client.get(f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{branch}", headers=headers)
  101. if ref_response.status_code == 404:
  102. return await self._create_branch_and_push(
  103. client, headers, api_base, owner, repo, branch, files, repo_url, token
  104. )
  105. if ref_response.status_code != 200:
  106. return {
  107. "status": "failed",
  108. "message": f"Failed to get branch ref: {ref_response.status_code}",
  109. "error": self._truncated_response_text(ref_response),
  110. }
  111. current_commit_sha = ref_response.json()["object"]["sha"]
  112. commit_response = await client.get(
  113. f"{api_base}/repos/{owner}/{repo}/git/commits/{current_commit_sha}", headers=headers
  114. )
  115. if commit_response.status_code != 200:
  116. return {"status": "failed", "message": "Failed to get current commit"}
  117. current_tree_sha = commit_response.json()["tree"]["sha"]
  118. tree_response = await client.get(
  119. f"{api_base}/repos/{owner}/{repo}/git/trees/{current_tree_sha}?recursive=1", headers=headers
  120. )
  121. existing_files: dict[str, str] = {}
  122. if tree_response.status_code == 200:
  123. for item in tree_response.json().get("tree", []):
  124. if item["type"] == "blob":
  125. existing_files[item["path"]] = item["sha"]
  126. tree_items = []
  127. files_changed = 0
  128. for path, content in files.items():
  129. content_str = json.dumps(content, indent=2, default=str)
  130. content_bytes = content_str.encode("utf-8")
  131. content_sha = self._blob_sha(content_bytes)
  132. if path in existing_files and existing_files[path] == content_sha:
  133. continue
  134. blob_response = await client.post(
  135. f"{api_base}/repos/{owner}/{repo}/git/blobs",
  136. headers=headers,
  137. json={"content": base64.b64encode(content_bytes).decode(), "encoding": "base64"},
  138. )
  139. if blob_response.status_code != 201:
  140. logger.error("Failed to create blob for %s: %s", path, self._truncated_response_text(blob_response))
  141. continue
  142. tree_items.append({"path": path, "mode": "100644", "type": "blob", "sha": blob_response.json()["sha"]})
  143. files_changed += 1
  144. if not tree_items:
  145. return {"status": "skipped", "message": "No changes to commit", "commit_sha": None, "files_changed": 0}
  146. tree_response = await client.post(
  147. f"{api_base}/repos/{owner}/{repo}/git/trees",
  148. headers=headers,
  149. json={"base_tree": current_tree_sha, "tree": tree_items},
  150. )
  151. if tree_response.status_code != 201:
  152. return {
  153. "status": "failed",
  154. "message": f"Failed to create tree: {self._truncated_response_text(tree_response)}",
  155. }
  156. new_tree_sha = tree_response.json()["sha"]
  157. commit_message = f"Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"
  158. commit_response = await client.post(
  159. f"{api_base}/repos/{owner}/{repo}/git/commits",
  160. headers=headers,
  161. json={"message": commit_message, "tree": new_tree_sha, "parents": [current_commit_sha]},
  162. )
  163. if commit_response.status_code != 201:
  164. return {
  165. "status": "failed",
  166. "message": f"Failed to create commit: {self._truncated_response_text(commit_response)}",
  167. }
  168. new_commit_sha = commit_response.json()["sha"]
  169. ref_update = await client.patch(
  170. f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{branch}",
  171. headers=headers,
  172. json={"sha": new_commit_sha},
  173. )
  174. if ref_update.status_code != 200:
  175. return {
  176. "status": "failed",
  177. "message": f"Failed to update branch: {self._truncated_response_text(ref_update)}",
  178. }
  179. return {
  180. "status": "success",
  181. "message": f"Backup successful - {files_changed} files updated",
  182. "commit_sha": new_commit_sha,
  183. "files_changed": files_changed,
  184. }
  185. except Exception as e:
  186. logger.error("Push to Git failed: %s", e)
  187. return {"status": "failed", "message": str(e), "error": str(e)}
  188. async def _create_branch_and_push(
  189. self,
  190. client: httpx.AsyncClient,
  191. headers: dict,
  192. api_base: str,
  193. owner: str,
  194. repo: str,
  195. branch: str,
  196. files: dict,
  197. repo_url: str,
  198. token: str,
  199. ) -> dict:
  200. """Create branch (from default branch or as initial commit) then push."""
  201. try:
  202. repo_response = await client.get(f"{api_base}/repos/{owner}/{repo}", headers=headers)
  203. if repo_response.status_code != 200:
  204. return {"status": "failed", "message": "Failed to get repo info"}
  205. default_branch = repo_response.json().get("default_branch", "main")
  206. ref_response = await client.get(
  207. f"{api_base}/repos/{owner}/{repo}/git/refs/heads/{default_branch}", headers=headers
  208. )
  209. if ref_response.status_code != 200:
  210. return await self._create_initial_commit(client, headers, api_base, owner, repo, branch, files)
  211. base_sha = ref_response.json()["object"]["sha"]
  212. create_ref = await client.post(
  213. f"{api_base}/repos/{owner}/{repo}/git/refs",
  214. headers=headers,
  215. json={"ref": f"refs/heads/{branch}", "sha": base_sha},
  216. )
  217. if create_ref.status_code != 201:
  218. return {
  219. "status": "failed",
  220. "message": f"Failed to create branch: {self._truncated_response_text(create_ref)}",
  221. }
  222. return await self.push_files(repo_url, token, branch, files, client)
  223. except Exception as e:
  224. return {"status": "failed", "message": str(e)}
  225. async def _create_initial_commit(
  226. self,
  227. client: httpx.AsyncClient,
  228. headers: dict,
  229. api_base: str,
  230. owner: str,
  231. repo: str,
  232. branch: str,
  233. files: dict,
  234. ) -> dict:
  235. """Create the first commit in an empty repository."""
  236. try:
  237. tree_items = []
  238. for path, content in files.items():
  239. content_str = json.dumps(content, indent=2, default=str)
  240. blob_response = await client.post(
  241. f"{api_base}/repos/{owner}/{repo}/git/blobs",
  242. headers=headers,
  243. json={"content": base64.b64encode(content_str.encode()).decode(), "encoding": "base64"},
  244. )
  245. if blob_response.status_code == 201:
  246. tree_items.append(
  247. {"path": path, "mode": "100644", "type": "blob", "sha": blob_response.json()["sha"]}
  248. )
  249. tree_response = await client.post(
  250. f"{api_base}/repos/{owner}/{repo}/git/trees",
  251. headers=headers,
  252. json={"tree": tree_items},
  253. )
  254. if tree_response.status_code != 201:
  255. return {"status": "failed", "message": "Failed to create tree"}
  256. tree_sha = tree_response.json()["sha"]
  257. commit_response = await client.post(
  258. f"{api_base}/repos/{owner}/{repo}/git/commits",
  259. headers=headers,
  260. json={
  261. "message": f"Initial Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}",
  262. "tree": tree_sha,
  263. },
  264. )
  265. if commit_response.status_code != 201:
  266. return {"status": "failed", "message": "Failed to create commit"}
  267. commit_sha = commit_response.json()["sha"]
  268. ref_response = await client.post(
  269. f"{api_base}/repos/{owner}/{repo}/git/refs",
  270. headers=headers,
  271. json={"ref": f"refs/heads/{branch}", "sha": commit_sha},
  272. )
  273. if ref_response.status_code != 201:
  274. return {"status": "failed", "message": "Failed to create branch ref"}
  275. return {
  276. "status": "success",
  277. "message": f"Initial backup created - {len(files)} files",
  278. "commit_sha": commit_sha,
  279. "files_changed": len(files),
  280. }
  281. except Exception as e:
  282. return {"status": "failed", "message": str(e)}