From 82900239580290e06225411d1029f43e41bf5eed Mon Sep 17 00:00:00 2001 From: Joel Robotham Date: Wed, 11 Mar 2026 17:07:28 +1100 Subject: [PATCH] fix: convert restored snapshots to bare mirrors with correct upstream URL Snapshots are non-bare clones with remote.origin.url pointing to the cachew server URL (so end-user git pulls route through the proxy). When tryRestoreSnapshot restores these into the mirror path, backgroundFetch would attempt to fetch from the cachew URL, creating a self-referential loop that times out after 600 seconds. After restoring a snapshot, convert it to a bare repository layout and reset remote.origin.url to the actual upstream GitHub URL. Amp-Thread-ID: https://ampcode.com/threads/T-019cdb7b-f7b3-7028-a6d6-80369d19965a Co-authored-by: Amp --- internal/strategy/git/export_test.go | 4 ++ internal/strategy/git/git.go | 72 ++++++++++++++++++++++++++ internal/strategy/git/snapshot_test.go | 71 +++++++++++++++++++++++++ 3 files changed, 147 insertions(+) diff --git a/internal/strategy/git/export_test.go b/internal/strategy/git/export_test.go index 3fb1a17..7a968c2 100644 --- a/internal/strategy/git/export_test.go +++ b/internal/strategy/git/export_test.go @@ -9,3 +9,7 @@ import ( func (s *Strategy) GenerateAndUploadSnapshot(ctx context.Context, repo *gitclone.Repository) error { return s.generateAndUploadSnapshot(ctx, repo) } + +func ConvertSnapshotToMirror(ctx context.Context, repoPath, upstreamURL string) error { + return convertSnapshotToMirror(ctx, repoPath, upstreamURL) +} diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 575b930..1976e56 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -493,6 +493,11 @@ func (s *Strategy) startClone(ctx context.Context, repo *gitclone.Repository) { // tryRestoreSnapshot attempts to restore a mirror from an S3 snapshot. // On failure the repo path is cleaned up so the caller can fall back to clone. +// +// Snapshots are non-bare clones with remote.origin.url pointing to the cachew +// server (so end-user git pulls go through the proxy). When restoring into the +// mirror path we must fix the remote URL back to the real upstream and convert +// the repo to bare, otherwise backgroundFetch would loop back to cachew itself. func (s *Strategy) tryRestoreSnapshot(ctx context.Context, repo *gitclone.Repository) error { cacheKey := snapshotCacheKey(repo.UpstreamURL()) @@ -505,6 +510,11 @@ func (s *Strategy) tryRestoreSnapshot(ctx context.Context, repo *gitclone.Reposi return errors.Wrap(err, "restore snapshot") } + if err := convertSnapshotToMirror(ctx, repo.Path(), repo.UpstreamURL()); err != nil { + _ = os.RemoveAll(repo.Path()) + return errors.Wrap(err, "convert snapshot to mirror") + } + if err := repo.MarkRestored(ctx); err != nil { _ = os.RemoveAll(repo.Path()) return errors.Wrap(err, "mark restored") @@ -513,6 +523,68 @@ func (s *Strategy) tryRestoreSnapshot(ctx context.Context, repo *gitclone.Reposi return nil } +// convertSnapshotToMirror converts a restored non-bare snapshot into a bare +// mirror suitable for serving upload-pack. It resets remote.origin.url to the +// real upstream and sets core.bare = true. +func convertSnapshotToMirror(ctx context.Context, repoPath, upstreamURL string) error { + // The snapshot is a non-bare clone (.git subdir). Detect this by checking + // for the .git directory and, if present, relocate it to a bare layout. + dotGit := filepath.Join(repoPath, ".git") + if info, err := os.Stat(dotGit); err == nil && info.IsDir() { + if err := convertToBare(repoPath, dotGit); err != nil { + return err + } + } + + // Reset the remote URL to the actual upstream. + // #nosec G204 - repoPath and upstreamURL are controlled by us + cmd := exec.CommandContext(ctx, "git", "-C", repoPath, "remote", "set-url", "origin", upstreamURL) + if output, err := cmd.CombinedOutput(); err != nil { + return errors.Wrapf(err, "set remote URL: %s", string(output)) + } + + // Mark the repo as bare so git treats it as a mirror. + // #nosec G204 - repoPath is controlled by us + cmd = exec.CommandContext(ctx, "git", "-C", repoPath, "config", "core.bare", "true") + if output, err := cmd.CombinedOutput(); err != nil { + return errors.Wrapf(err, "set core.bare: %s", string(output)) + } + + return nil +} + +// convertToBare moves the contents of .git/ up into repoPath, removing the +// working tree, so the directory becomes a bare repository. +func convertToBare(repoPath, dotGit string) error { + entries, err := os.ReadDir(dotGit) + if err != nil { + return errors.Wrap(err, "read .git directory") + } + + // Remove working tree files (everything except .git). + topEntries, err := os.ReadDir(repoPath) + if err != nil { + return errors.Wrap(err, "read repo directory") + } + for _, e := range topEntries { + if e.Name() == ".git" { + continue + } + _ = os.RemoveAll(filepath.Join(repoPath, e.Name())) + } + + // Move .git/* contents up to repoPath. + for _, e := range entries { + src := filepath.Join(dotGit, e.Name()) + dst := filepath.Join(repoPath, e.Name()) + if err := os.Rename(src, dst); err != nil { + return errors.Wrapf(err, "move %s", e.Name()) + } + } + + return errors.Wrap(os.Remove(dotGit), "remove .git directory") +} + func (s *Strategy) maybeBackgroundFetch(repo *gitclone.Repository) { if !repo.NeedsFetch(s.cloneManager.Config().FetchInterval) { return diff --git a/internal/strategy/git/snapshot_test.go b/internal/strategy/git/snapshot_test.go index 2ef67db..d053306 100644 --- a/internal/strategy/git/snapshot_test.go +++ b/internal/strategy/git/snapshot_test.go @@ -213,6 +213,77 @@ func TestSnapshotGenerationViaLocalClone(t *testing.T) { assert.True(t, os.IsNotExist(err)) } +func TestSnapshotRestoreConvertsMirror(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found in PATH") + } + + _, ctx := logging.Configure(context.Background(), logging.Config{}) + tmpDir := t.TempDir() + mirrorRoot := filepath.Join(tmpDir, "mirrors") + upstreamURL := "https://github.com/org/repo" + serverURL := "http://cachew.example.com" + + // Create a mirror repo and generate a snapshot with the cachew server URL. + mirrorPath := filepath.Join(mirrorRoot, "github.com", "org", "repo") + createTestMirrorRepo(t, mirrorPath) + + memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{}) + assert.NoError(t, err) + mux := newTestMux() + + cm := gitclone.NewManagerProvider(ctx, gitclone.Config{MirrorRoot: mirrorRoot}, nil) + s, err := git.New(ctx, git.Config{ServerURL: serverURL}, newTestScheduler(ctx, t), memCache, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil + assert.NoError(t, err) + + manager, err := cm() + assert.NoError(t, err) + repo, err := manager.GetOrCreate(ctx, upstreamURL) + assert.NoError(t, err) + + err = s.GenerateAndUploadSnapshot(ctx, repo) + assert.NoError(t, err) + + // Restore the snapshot into a new directory (simulating mirror restore). + restoreDir := filepath.Join(tmpDir, "restored-mirror") + cacheKey := cache.NewKey(upstreamURL + ".snapshot") + err = snapshot.Restore(ctx, memCache, cacheKey, restoreDir, 0) + assert.NoError(t, err) + + // Before conversion: should be non-bare with cachew remote URL. + cmd := exec.Command("git", "-C", restoreDir, "remote", "get-url", "origin") + output, err := cmd.CombinedOutput() + assert.NoError(t, err, string(output)) + assert.Equal(t, serverURL+"/git/github.com/org/repo\n", string(output)) + + _, err = os.Stat(filepath.Join(restoreDir, ".git")) + assert.NoError(t, err, "should have .git directory before conversion") + + // Convert snapshot to mirror. + err = git.ConvertSnapshotToMirror(ctx, restoreDir, upstreamURL) + assert.NoError(t, err) + + // After conversion: remote should point to upstream. + cmd = exec.Command("git", "-C", restoreDir, "remote", "get-url", "origin") + output, err = cmd.CombinedOutput() + assert.NoError(t, err, string(output)) + assert.Equal(t, upstreamURL+"\n", string(output)) + + // After conversion: should be bare (no .git subdir, config at top level). + _, err = os.Stat(filepath.Join(restoreDir, ".git")) + assert.True(t, os.IsNotExist(err), "should not have .git directory after conversion") + + cmd = exec.Command("git", "-C", restoreDir, "config", "core.bare") + output, err = cmd.CombinedOutput() + assert.NoError(t, err, string(output)) + assert.Equal(t, "true\n", string(output)) + + // Verify the repo is functional: git branch should work. + cmd = exec.Command("git", "-C", restoreDir, "branch") + output, err = cmd.CombinedOutput() + assert.NoError(t, err, string(output)) +} + func TestSnapshotRemoteURLUsesServerURL(t *testing.T) { if _, err := exec.LookPath("git"); err != nil { t.Skip("git not found in PATH")