gitlab.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. """GitLab backend — implements GitProviderBackend using the GitLab REST API v4."""
  2. import base64
  3. import json
  4. import logging
  5. import re
  6. import urllib.parse
  7. from datetime import datetime, timezone
  8. import httpx
  9. from backend.app.services.git_providers.base import GitProviderBackend
  10. logger = logging.getLogger(__name__)
  11. class GitLabBackend(GitProviderBackend):
  12. """Backend for gitlab.com and self-hosted GitLab instances."""
  13. def get_api_base(self, repo_url: str) -> str:
  14. match = re.match(r"(https?://[\w.\-]+(:\d+)?)/", repo_url)
  15. if not match:
  16. raise ValueError(f"Cannot derive API base from URL: {repo_url}")
  17. return f"{match.group(1)}/api/v4"
  18. def get_headers(self, token: str) -> dict:
  19. return {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
  20. def parse_repo_url(self, url: str) -> tuple[str, str]:
  21. """Return (namespace, repo) from HTTPS or SSH URL.
  22. namespace may include subgroups, e.g. 'group/subgroup' for
  23. gitlab.com/group/subgroup/project. Callers join them with '/' and
  24. URL-encode the result for /api/v4/projects/{encoded_path}.
  25. """
  26. if not url or len(url) > 500:
  27. raise ValueError("Invalid Git URL: URL too long or empty")
  28. match = re.match(r"https?://[\w.\-]+(:\d+)?/(.+?)(?:\.git)?/?$", url)
  29. if match:
  30. full_path = match.group(2)
  31. if "/" not in full_path:
  32. raise ValueError(f"Cannot parse repository URL: {url}")
  33. namespace, _, repo = full_path.rpartition("/")
  34. return namespace, repo
  35. match = re.match(r"git@[\w.\-]+:(.+?)(?:\.git)?$", url)
  36. if match:
  37. full_path = match.group(1)
  38. if "/" not in full_path:
  39. raise ValueError(f"Cannot parse repository URL: {url}")
  40. namespace, _, repo = full_path.rpartition("/")
  41. return namespace, repo
  42. raise ValueError(f"Cannot parse repository URL: {url}")
  43. async def test_connection(self, repo_url: str, token: str, client: httpx.AsyncClient) -> dict:
  44. try:
  45. owner, repo = self.parse_repo_url(repo_url)
  46. api_base = self.get_api_base(repo_url)
  47. headers = self.get_headers(token)
  48. encoded_path = urllib.parse.quote(f"{owner}/{repo}", safe="")
  49. response = await client.get(f"{api_base}/projects/{encoded_path}", headers=headers)
  50. if response.status_code == 401:
  51. return {"success": False, "message": "Invalid access token", "repo_name": None, "permissions": None}
  52. if response.status_code == 404:
  53. return {
  54. "success": False,
  55. "message": "Repository not found. Check URL and token permissions.",
  56. "repo_name": None,
  57. "permissions": None,
  58. }
  59. if response.status_code != 200:
  60. return {
  61. "success": False,
  62. "message": f"API error: {response.status_code}",
  63. "repo_name": None,
  64. "permissions": None,
  65. }
  66. data = response.json()
  67. perms = data.get("permissions") or {}
  68. project_level = (perms.get("project_access") or {}).get("access_level", 0)
  69. group_level = (perms.get("group_access") or {}).get("access_level", 0)
  70. effective = max(project_level, group_level)
  71. # GitLab uses visibility="private" / "internal" / "public". Both
  72. # "internal" (signed-in users) and "public" are non-private for
  73. # the purposes of this safety check.
  74. visibility = (data.get("visibility") or "").lower()
  75. is_private = visibility == "private"
  76. if effective < 30: # Developer = 30, Maintainer = 40, Owner = 50
  77. return {
  78. "success": False,
  79. "message": "Token requires Developer access or higher to push",
  80. "repo_name": data.get("name_with_namespace"),
  81. "permissions": perms,
  82. "is_private": is_private,
  83. }
  84. return {
  85. "success": True,
  86. "message": "Connection successful",
  87. "repo_name": data.get("name_with_namespace"),
  88. "permissions": perms,
  89. "is_private": is_private,
  90. }
  91. except Exception as e:
  92. logger.error("GitLab connection test failed: %s", e)
  93. return {
  94. "success": False,
  95. "message": f"Connection failed: {type(e).__name__}",
  96. "repo_name": None,
  97. "permissions": None,
  98. "is_private": None,
  99. }
  100. async def push_files(
  101. self,
  102. repo_url: str,
  103. token: str,
  104. branch: str,
  105. files: dict,
  106. client: httpx.AsyncClient,
  107. ) -> dict:
  108. try:
  109. owner, repo = self.parse_repo_url(repo_url)
  110. api_base = self.get_api_base(repo_url)
  111. headers = self.get_headers(token)
  112. encoded_path = urllib.parse.quote(f"{owner}/{repo}", safe="")
  113. encoded_branch = urllib.parse.quote(branch, safe="")
  114. branch_response = await client.get(
  115. f"{api_base}/projects/{encoded_path}/repository/branches/{encoded_branch}",
  116. headers=headers,
  117. )
  118. if branch_response.status_code == 404:
  119. proj_response = await client.get(f"{api_base}/projects/{encoded_path}", headers=headers)
  120. if proj_response.status_code != 200:
  121. return {"status": "failed", "message": "Failed to get project info"}
  122. default_branch = proj_response.json().get("default_branch", "main")
  123. default_encoded = urllib.parse.quote(default_branch, safe="")
  124. default_response = await client.get(
  125. f"{api_base}/projects/{encoded_path}/repository/branches/{default_encoded}",
  126. headers=headers,
  127. )
  128. if default_response.status_code != 200:
  129. return await self._create_initial_commit(client, headers, api_base, encoded_path, branch, files)
  130. create_response = await client.post(
  131. f"{api_base}/projects/{encoded_path}/repository/branches",
  132. headers=headers,
  133. json={"branch": branch, "ref": default_branch},
  134. )
  135. if create_response.status_code not in (200, 201):
  136. return {"status": "failed", "message": f"Failed to create branch: {create_response.status_code}"}
  137. elif branch_response.status_code != 200:
  138. return {"status": "failed", "message": f"Failed to check branch: {branch_response.status_code}"}
  139. existing_blobs: dict[str, str] = {}
  140. page = 1
  141. while True:
  142. tree_response = await client.get(
  143. f"{api_base}/projects/{encoded_path}/repository/tree",
  144. headers=headers,
  145. params={"recursive": "true", "ref": branch, "per_page": 100, "page": page},
  146. )
  147. if tree_response.status_code != 200:
  148. break
  149. items = tree_response.json()
  150. if not items:
  151. break
  152. for item in items:
  153. if item.get("type") == "blob":
  154. existing_blobs[item["path"]] = item["id"]
  155. page += 1
  156. actions = []
  157. for path, content in files.items():
  158. content_str = json.dumps(content, indent=2, default=str)
  159. content_bytes = content_str.encode("utf-8")
  160. content_sha = self._blob_sha(content_bytes)
  161. if path in existing_blobs and existing_blobs[path] == content_sha:
  162. continue
  163. actions.append(
  164. {
  165. "action": "update" if path in existing_blobs else "create",
  166. "file_path": path,
  167. "content": base64.b64encode(content_bytes).decode(),
  168. "encoding": "base64",
  169. }
  170. )
  171. if not actions:
  172. return {"status": "skipped", "message": "No changes to commit", "commit_sha": None, "files_changed": 0}
  173. commit_message = f"Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"
  174. commit_response = await client.post(
  175. f"{api_base}/projects/{encoded_path}/repository/commits",
  176. headers=headers,
  177. json={"branch": branch, "commit_message": commit_message, "actions": actions},
  178. )
  179. if commit_response.status_code not in (200, 201):
  180. return {
  181. "status": "failed",
  182. "message": f"Failed to create commit: {self._truncated_response_text(commit_response)}",
  183. }
  184. return {
  185. "status": "success",
  186. "message": f"Backup successful - {len(actions)} files updated",
  187. "commit_sha": commit_response.json().get("id"),
  188. "files_changed": len(actions),
  189. }
  190. except Exception as e:
  191. logger.error("Push to GitLab failed: %s", e)
  192. return {"status": "failed", "message": str(e), "error": str(e)}
  193. async def _create_initial_commit(
  194. self,
  195. client: httpx.AsyncClient,
  196. headers: dict,
  197. api_base: str,
  198. encoded_path: str,
  199. branch: str,
  200. files: dict,
  201. ) -> dict:
  202. """Create the first commit in an empty repository."""
  203. try:
  204. actions = []
  205. for path, content in files.items():
  206. content_str = json.dumps(content, indent=2, default=str)
  207. actions.append(
  208. {
  209. "action": "create",
  210. "file_path": path,
  211. "content": base64.b64encode(content_str.encode()).decode(),
  212. "encoding": "base64",
  213. }
  214. )
  215. commit_message = f"Initial Bambuddy backup - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"
  216. commit_response = await client.post(
  217. f"{api_base}/projects/{encoded_path}/repository/commits",
  218. headers=headers,
  219. json={"branch": branch, "commit_message": commit_message, "actions": actions, "start_branch": branch},
  220. )
  221. if commit_response.status_code not in (200, 201):
  222. return {
  223. "status": "failed",
  224. "message": f"Failed to create initial commit: {self._truncated_response_text(commit_response)}",
  225. }
  226. return {
  227. "status": "success",
  228. "message": f"Initial backup created - {len(files)} files",
  229. "commit_sha": commit_response.json().get("id"),
  230. "files_changed": len(files),
  231. }
  232. except Exception as e:
  233. return {"status": "failed", "message": str(e)}