test_git_providers.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. """Unit tests for the git_providers abstraction package."""
  2. import hashlib
  3. import json
  4. from unittest.mock import AsyncMock, MagicMock
  5. import pytest
  6. from backend.app.services.git_providers.factory import get_provider_backend
  7. from backend.app.services.git_providers.forgejo import ForgejoBackend
  8. from backend.app.services.git_providers.gitea import GiteaBackend
  9. from backend.app.services.git_providers.github import GitHubBackend
  10. from backend.app.services.git_providers.gitlab import GitLabBackend
  11. class TestFactory:
  12. def test_known_providers_return_correct_class(self):
  13. assert isinstance(get_provider_backend("github"), GitHubBackend)
  14. assert isinstance(get_provider_backend("gitea"), GiteaBackend)
  15. assert isinstance(get_provider_backend("forgejo"), ForgejoBackend)
  16. assert isinstance(get_provider_backend("gitlab"), GitLabBackend)
  17. def test_unknown_provider_raises_value_error(self):
  18. with pytest.raises(ValueError, match="Unknown Git provider"):
  19. get_provider_backend("bitbucket")
  20. class TestGitHubBackendParseUrl:
  21. def setup_method(self):
  22. self.backend = GitHubBackend()
  23. def test_https_url(self):
  24. owner, repo = self.backend.parse_repo_url("https://github.com/owner/repo")
  25. assert owner == "owner"
  26. assert repo == "repo"
  27. def test_https_url_with_git_suffix(self):
  28. owner, repo = self.backend.parse_repo_url("https://github.com/owner/repo.git")
  29. assert owner == "owner"
  30. assert repo == "repo"
  31. def test_ssh_url(self):
  32. owner, repo = self.backend.parse_repo_url("git@github.com:owner/repo")
  33. assert owner == "owner"
  34. assert repo == "repo"
  35. def test_ssh_url_with_git_suffix(self):
  36. owner, repo = self.backend.parse_repo_url("git@github.com:owner/repo.git")
  37. assert owner == "owner"
  38. assert repo == "repo"
  39. def test_invalid_url_raises_value_error(self):
  40. with pytest.raises(ValueError, match="Cannot parse repository URL"):
  41. self.backend.parse_repo_url("https://example.com/not-a-repo")
  42. def test_empty_url_raises_value_error(self):
  43. with pytest.raises(ValueError):
  44. self.backend.parse_repo_url("")
  45. class TestGitHubBackendApiBase:
  46. def setup_method(self):
  47. self.backend = GitHubBackend()
  48. def test_github_com_returns_api_github_com(self):
  49. assert self.backend.get_api_base("https://github.com/owner/repo") == "https://api.github.com"
  50. def test_ghe_host_returns_v3_endpoint(self):
  51. assert self.backend.get_api_base("https://github.example.com/owner/repo") == "https://github.example.com/api/v3"
  52. def test_ghe_host_with_port(self):
  53. assert self.backend.get_api_base("https://github.example.com:8443/owner/repo") == "https://github.example.com:8443/api/v3"
  54. def test_ssh_github_com_returns_api_github_com(self):
  55. assert self.backend.get_api_base("git@github.com:owner/repo.git") == "https://api.github.com"
  56. def test_ssh_ghe_host_returns_v3_endpoint(self):
  57. assert self.backend.get_api_base("git@github.example.com:owner/repo.git") == "https://github.example.com/api/v3"
  58. class TestGiteaBackendApiBase:
  59. def setup_method(self):
  60. self.backend = GiteaBackend()
  61. def test_derives_api_base_from_repo_url(self):
  62. result = self.backend.get_api_base("https://git.example.com/owner/repo")
  63. assert result == "https://git.example.com/api/v1"
  64. def test_derives_api_base_with_port(self):
  65. result = self.backend.get_api_base("https://git.example.com:3000/owner/repo")
  66. assert result == "https://git.example.com:3000/api/v1"
  67. def test_invalid_url_raises_value_error(self):
  68. with pytest.raises(ValueError, match="Cannot derive API base"):
  69. self.backend.get_api_base("not-a-url")
  70. def test_parse_url_uses_instance_host(self):
  71. owner, repo = self.backend.parse_repo_url("https://git.example.com/owner/repo")
  72. assert owner == "owner"
  73. assert repo == "repo"
  74. class TestGiteaBackendPushFiles:
  75. def setup_method(self):
  76. self.backend = GiteaBackend()
  77. self.repo_url = "https://git.example.com/owner/repo"
  78. self.token = "gitea-token"
  79. self.branch = "bambuddy-backup"
  80. @pytest.mark.asyncio
  81. async def test_n_files_produce_single_commit(self):
  82. """All changed files are bundled into one commit via the Git Data API."""
  83. files = {"a.json": {"k": "v1"}, "b.json": {"k": "v2"}}
  84. client = AsyncMock()
  85. client.get = AsyncMock(
  86. side_effect=[
  87. _make_mock_response(200, {"object": {"sha": "base-commit"}}),
  88. _make_mock_response(200, {"tree": {"sha": "base-tree"}}),
  89. _make_mock_response(200, {"tree": []}),
  90. ]
  91. )
  92. client.post = AsyncMock(
  93. side_effect=[
  94. _make_mock_response(201, {"sha": "blob1"}),
  95. _make_mock_response(201, {"sha": "blob2"}),
  96. _make_mock_response(201, {"sha": "new-tree"}),
  97. _make_mock_response(201, {"sha": "new-commit"}),
  98. ]
  99. )
  100. client.patch = AsyncMock(return_value=_make_mock_response(200, {}))
  101. result = await self.backend.push_files(self.repo_url, self.token, self.branch, files, client)
  102. assert result["status"] == "success"
  103. assert result["files_changed"] == 2
  104. commit_calls = [c for c in client.post.call_args_list if "/git/commits" in c.args[0]]
  105. assert len(commit_calls) == 1
  106. @pytest.mark.asyncio
  107. async def test_uses_gitea_api_v1_base_not_github(self):
  108. """Git Data API calls target the instance's /api/v1, not api.github.com."""
  109. client = AsyncMock()
  110. client.get = AsyncMock(
  111. side_effect=[
  112. _make_mock_response(200, {"object": {"sha": "base-commit"}}),
  113. _make_mock_response(200, {"tree": {"sha": "base-tree"}}),
  114. _make_mock_response(200, {"tree": []}),
  115. ]
  116. )
  117. client.post = AsyncMock(
  118. side_effect=[
  119. _make_mock_response(201, {"sha": "blob1"}),
  120. _make_mock_response(201, {"sha": "new-tree"}),
  121. _make_mock_response(201, {"sha": "new-commit"}),
  122. ]
  123. )
  124. client.patch = AsyncMock(return_value=_make_mock_response(200, {}))
  125. await self.backend.push_files(self.repo_url, self.token, self.branch, {"a.json": {"k": "v"}}, client)
  126. first_get_url = client.get.call_args_list[0].args[0]
  127. assert "git.example.com/api/v1" in first_get_url
  128. assert "api.github.com" not in first_get_url
  129. @pytest.mark.asyncio
  130. async def test_skips_unchanged_files(self):
  131. """Files whose blob SHA matches the existing tree entry are excluded from the commit."""
  132. content = {"name": "my-printer"}
  133. sha = _blob_sha(content)
  134. client = AsyncMock()
  135. client.get = AsyncMock(
  136. side_effect=[
  137. _make_mock_response(200, {"object": {"sha": "base-commit"}}),
  138. _make_mock_response(200, {"tree": {"sha": "base-tree"}}),
  139. _make_mock_response(200, {"tree": [{"type": "blob", "path": "config/printers.json", "sha": sha}]}),
  140. ]
  141. )
  142. result = await self.backend.push_files(
  143. self.repo_url, self.token, self.branch, {"config/printers.json": content}, client
  144. )
  145. assert result["status"] == "skipped"
  146. client.post.assert_not_called()
  147. @pytest.mark.asyncio
  148. async def test_creates_missing_branch_via_git_refs_api(self):
  149. """A missing backup branch is created via the Git Data API refs endpoint."""
  150. client = AsyncMock()
  151. client.get = AsyncMock(
  152. side_effect=[
  153. # branch ref missing
  154. _make_mock_response(404, {}),
  155. # repo info for default branch
  156. _make_mock_response(200, {"default_branch": "main"}),
  157. # default branch ref
  158. _make_mock_response(200, {"object": {"sha": "base-sha"}}),
  159. # second push_files call: branch now exists
  160. _make_mock_response(200, {"object": {"sha": "base-sha"}}),
  161. _make_mock_response(200, {"tree": {"sha": "base-tree"}}),
  162. _make_mock_response(200, {"tree": []}),
  163. ]
  164. )
  165. client.post = AsyncMock(
  166. side_effect=[
  167. _make_mock_response(201, {}), # create ref
  168. _make_mock_response(201, {"sha": "blob1"}),
  169. _make_mock_response(201, {"sha": "new-tree"}),
  170. _make_mock_response(201, {"sha": "new-commit"}),
  171. ]
  172. )
  173. client.patch = AsyncMock(return_value=_make_mock_response(200, {}))
  174. result = await self.backend.push_files(
  175. self.repo_url, self.token, self.branch, {"a.json": {"k": "v"}}, client
  176. )
  177. assert result["status"] == "success"
  178. ref_create_call = client.post.call_args_list[0]
  179. assert "/git/refs" in ref_create_call.args[0]
  180. assert ref_create_call.kwargs["json"]["ref"] == f"refs/heads/{self.branch}"
  181. @pytest.mark.asyncio
  182. async def test_truncates_upstream_error_body_in_failure_message(self):
  183. client = AsyncMock()
  184. client.get = AsyncMock(
  185. side_effect=[
  186. _make_mock_response(200, {"object": {"sha": "base-commit"}}),
  187. _make_mock_response(200, {"tree": {"sha": "base-tree"}}),
  188. _make_mock_response(200, {"tree": []}),
  189. ]
  190. )
  191. client.post = AsyncMock(
  192. side_effect=[
  193. _make_mock_response(201, {"sha": "blob1"}),
  194. _make_mock_response(500, {}, text="x" * 500),
  195. ]
  196. )
  197. result = await self.backend.push_files(
  198. self.repo_url, self.token, self.branch, {"a.json": {"k": "v"}}, client
  199. )
  200. assert result["status"] == "failed"
  201. assert result["message"] == f"Failed to create tree: {'x' * 197}..."
  202. class TestForgejoBackendApiBase:
  203. def setup_method(self):
  204. self.backend = ForgejoBackend()
  205. def test_derives_api_base_from_repo_url(self):
  206. result = self.backend.get_api_base("https://forgejo.example.com/owner/repo")
  207. assert result == "https://forgejo.example.com/api/v1"
  208. def test_derives_api_base_with_port(self):
  209. result = self.backend.get_api_base("https://forgejo.example.com:3000/owner/repo")
  210. assert result == "https://forgejo.example.com:3000/api/v1"
  211. def test_invalid_url_raises_value_error(self):
  212. with pytest.raises(ValueError, match="Cannot derive API base"):
  213. self.backend.get_api_base("not-a-url")
  214. def test_parse_url_uses_instance_host(self):
  215. owner, repo = self.backend.parse_repo_url("https://forgejo.example.com/owner/repo")
  216. assert owner == "owner"
  217. assert repo == "repo"
  218. class TestGitLabBackend:
  219. def setup_method(self):
  220. self.backend = GitLabBackend()
  221. def test_parse_url_https(self):
  222. owner, repo = self.backend.parse_repo_url("https://gitlab.com/owner/repo")
  223. assert owner == "owner"
  224. assert repo == "repo"
  225. def test_parse_url_ssh(self):
  226. owner, repo = self.backend.parse_repo_url("git@gitlab.com:owner/repo.git")
  227. assert owner == "owner"
  228. assert repo == "repo"
  229. def test_parse_url_invalid_raises(self):
  230. with pytest.raises(ValueError):
  231. self.backend.parse_repo_url("not-a-url")
  232. def test_get_api_base_derives_from_repo_url(self):
  233. result = self.backend.get_api_base("https://gitlab.com/owner/repo")
  234. assert result == "https://gitlab.com/api/v4"
  235. def test_get_api_base_derives_from_self_hosted_url(self):
  236. result = self.backend.get_api_base("https://my-gitlab.example.com/owner/repo")
  237. assert result == "https://my-gitlab.example.com/api/v4"
  238. def test_get_api_base_invalid_url_raises(self):
  239. with pytest.raises(ValueError, match="Cannot derive API base"):
  240. self.backend.get_api_base("not-a-url")
  241. def test_get_headers_uses_bearer_token(self):
  242. headers = self.backend.get_headers("mytoken")
  243. assert headers["Authorization"] == "Bearer mytoken"
  244. assert "Content-Type" in headers
  245. def test_parse_url_subgroup_https(self):
  246. namespace, repo = self.backend.parse_repo_url("https://gitlab.com/group/subgroup/project")
  247. assert namespace == "group/subgroup"
  248. assert repo == "project"
  249. def test_parse_url_deep_namespace_https(self):
  250. namespace, repo = self.backend.parse_repo_url("https://gitlab.com/myorg/team/api/backend")
  251. assert namespace == "myorg/team/api"
  252. assert repo == "backend"
  253. def test_parse_url_subgroup_ssh(self):
  254. namespace, repo = self.backend.parse_repo_url("git@gitlab.com:group/subgroup/project.git")
  255. assert namespace == "group/subgroup"
  256. assert repo == "project"
  257. @pytest.mark.asyncio
  258. async def test_push_files_encodes_subgroup_namespace_in_api_url(self):
  259. backend = GitLabBackend()
  260. repo_url = "https://gitlab.com/group/subgroup/project"
  261. client = AsyncMock()
  262. client.get = AsyncMock(
  263. side_effect=[
  264. _make_mock_response(200, {"name": "bambuddy-backup"}),
  265. _make_mock_response(200, []),
  266. ]
  267. )
  268. client.post = AsyncMock(return_value=_make_mock_response(201, {"id": "abc123"}))
  269. await backend.push_files(repo_url, "token", "bambuddy-backup", {"f.json": {}}, client)
  270. called_url = client.get.call_args_list[0].args[0]
  271. assert "group%2Fsubgroup%2Fproject" in called_url
  272. def _blob_sha(content: dict) -> str:
  273. content_bytes = json.dumps(content, indent=2, default=str).encode("utf-8")
  274. return hashlib.sha1(f"blob {len(content_bytes)}\0".encode() + content_bytes, usedforsecurity=False).hexdigest()
  275. def _make_mock_response(status_code: int, body=None, text: str = ""):
  276. resp = MagicMock()
  277. resp.status_code = status_code
  278. resp.text = text
  279. resp.json = MagicMock(return_value=body or {})
  280. return resp
  281. class TestGitLabBackendPushFiles:
  282. def setup_method(self):
  283. self.backend = GitLabBackend()
  284. self.repo_url = "https://gitlab.com/owner/repo"
  285. self.token = "glpat-test"
  286. self.branch = "bambuddy-backup"
  287. self.files = {"config/printers.json": {"name": "my-printer"}}
  288. @pytest.mark.asyncio
  289. async def test_skips_commit_when_content_unchanged(self):
  290. sha = _blob_sha(self.files["config/printers.json"])
  291. client = AsyncMock()
  292. client.get = AsyncMock(
  293. side_effect=[
  294. # branch check → branch exists
  295. _make_mock_response(200, {"name": self.branch}),
  296. # tree page 1 → one blob whose sha matches current content
  297. _make_mock_response(200, [{"type": "blob", "path": "config/printers.json", "id": sha}]),
  298. # tree page 2 → empty, stop pagination
  299. _make_mock_response(200, []),
  300. ]
  301. )
  302. result = await self.backend.push_files(self.repo_url, self.token, self.branch, self.files, client)
  303. assert result["status"] == "skipped"
  304. assert result["files_changed"] == 0
  305. client.post.assert_not_called()
  306. @pytest.mark.asyncio
  307. async def test_commits_when_content_changed(self):
  308. stale_sha = "0000000000000000000000000000000000000000"
  309. client = AsyncMock()
  310. client.get = AsyncMock(
  311. side_effect=[
  312. _make_mock_response(200, {"name": self.branch}),
  313. _make_mock_response(200, [{"type": "blob", "path": "config/printers.json", "id": stale_sha}]),
  314. _make_mock_response(200, []), # page 2 empty, stop pagination
  315. ]
  316. )
  317. client.post = AsyncMock(return_value=_make_mock_response(201, {"id": "abc123"}))
  318. result = await self.backend.push_files(self.repo_url, self.token, self.branch, self.files, client)
  319. assert result["status"] == "success"
  320. assert result["files_changed"] == 1
  321. client.post.assert_called_once()
  322. @pytest.mark.asyncio
  323. async def test_truncates_upstream_error_body_in_failure_message(self):
  324. client = AsyncMock()
  325. client.get = AsyncMock(
  326. side_effect=[
  327. _make_mock_response(200, {"name": self.branch}),
  328. _make_mock_response(200, []),
  329. ]
  330. )
  331. client.post = AsyncMock(return_value=_make_mock_response(500, {}, text="x" * 500))
  332. result = await self.backend.push_files(self.repo_url, self.token, self.branch, self.files, client)
  333. assert result["status"] == "failed"
  334. assert result["message"] == f"Failed to create commit: {'x' * 197}..."
  335. @pytest.mark.asyncio
  336. async def test_creates_new_file_not_in_existing_tree(self):
  337. client = AsyncMock()
  338. client.get = AsyncMock(
  339. side_effect=[
  340. _make_mock_response(200, {"name": self.branch}),
  341. # tree is empty
  342. _make_mock_response(200, []),
  343. ]
  344. )
  345. client.post = AsyncMock(return_value=_make_mock_response(201, {"id": "def456"}))
  346. result = await self.backend.push_files(self.repo_url, self.token, self.branch, self.files, client)
  347. assert result["status"] == "success"
  348. call_kwargs = client.post.call_args.kwargs["json"]
  349. assert call_kwargs["actions"][0]["action"] == "create"
  350. @pytest.mark.asyncio
  351. async def test_paginates_tree_to_find_unchanged_file_on_page_2(self):
  352. """Files beyond the first 100 are fetched; a file on page 2 is correctly skipped if unchanged."""
  353. sha = _blob_sha(self.files["config/printers.json"])
  354. page1_items = [{"type": "blob", "path": f"other{i}.json", "id": "aaa"} for i in range(100)]
  355. page2_items = (
  356. [{"type": "blob", "path": f"more{i}.json", "id": "bbb"} for i in range(19)]
  357. + [{"type": "blob", "path": "config/printers.json", "id": sha}]
  358. ) # 120 total blobs across two pages
  359. client = AsyncMock()
  360. client.get = AsyncMock(
  361. side_effect=[
  362. _make_mock_response(200, {"name": self.branch}), # branch check
  363. _make_mock_response(200, page1_items), # tree page 1
  364. _make_mock_response(200, page2_items), # tree page 2
  365. _make_mock_response(200, []), # tree page 3 empty, stop
  366. ]
  367. )
  368. result = await self.backend.push_files(self.repo_url, self.token, self.branch, self.files, client)
  369. assert result["status"] == "skipped"
  370. client.post.assert_not_called()