Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions internal/strategy/git/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ import (
func (s *Strategy) GenerateAndUploadSnapshot(ctx context.Context, repo *gitclone.Repository) error {
return s.generateAndUploadSnapshot(ctx, repo)
}

func ConvertSnapshotToMirror(ctx context.Context, repoPath, upstreamURL string) error {
return convertSnapshotToMirror(ctx, repoPath, upstreamURL)
}
72 changes: 72 additions & 0 deletions internal/strategy/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,11 @@ func (s *Strategy) startClone(ctx context.Context, repo *gitclone.Repository) {

// tryRestoreSnapshot attempts to restore a mirror from an S3 snapshot.
// On failure the repo path is cleaned up so the caller can fall back to clone.
//
// Snapshots are non-bare clones with remote.origin.url pointing to the cachew
// server (so end-user git pulls go through the proxy). When restoring into the
// mirror path we must fix the remote URL back to the real upstream and convert
// the repo to bare, otherwise backgroundFetch would loop back to cachew itself.
func (s *Strategy) tryRestoreSnapshot(ctx context.Context, repo *gitclone.Repository) error {
cacheKey := snapshotCacheKey(repo.UpstreamURL())

Expand All @@ -505,6 +510,11 @@ func (s *Strategy) tryRestoreSnapshot(ctx context.Context, repo *gitclone.Reposi
return errors.Wrap(err, "restore snapshot")
}

if err := convertSnapshotToMirror(ctx, repo.Path(), repo.UpstreamURL()); err != nil {
_ = os.RemoveAll(repo.Path())
return errors.Wrap(err, "convert snapshot to mirror")
}

if err := repo.MarkRestored(ctx); err != nil {
_ = os.RemoveAll(repo.Path())
return errors.Wrap(err, "mark restored")
Expand All @@ -513,6 +523,68 @@ func (s *Strategy) tryRestoreSnapshot(ctx context.Context, repo *gitclone.Reposi
return nil
}

// convertSnapshotToMirror converts a restored non-bare snapshot into a bare
// mirror suitable for serving upload-pack. It resets remote.origin.url to the
// real upstream and sets core.bare = true.
func convertSnapshotToMirror(ctx context.Context, repoPath, upstreamURL string) error {
// The snapshot is a non-bare clone (.git subdir). Detect this by checking
// for the .git directory and, if present, relocate it to a bare layout.
dotGit := filepath.Join(repoPath, ".git")
if info, err := os.Stat(dotGit); err == nil && info.IsDir() {
if err := convertToBare(repoPath, dotGit); err != nil {
return err
}
}

// Reset the remote URL to the actual upstream.
// #nosec G204 - repoPath and upstreamURL are controlled by us
cmd := exec.CommandContext(ctx, "git", "-C", repoPath, "remote", "set-url", "origin", upstreamURL)
if output, err := cmd.CombinedOutput(); err != nil {
return errors.Wrapf(err, "set remote URL: %s", string(output))
}

// Mark the repo as bare so git treats it as a mirror.
// #nosec G204 - repoPath is controlled by us
cmd = exec.CommandContext(ctx, "git", "-C", repoPath, "config", "core.bare", "true")
if output, err := cmd.CombinedOutput(); err != nil {
return errors.Wrapf(err, "set core.bare: %s", string(output))
}

return nil
}

// convertToBare moves the contents of .git/ up into repoPath, removing the
// working tree, so the directory becomes a bare repository.
func convertToBare(repoPath, dotGit string) error {
entries, err := os.ReadDir(dotGit)
if err != nil {
return errors.Wrap(err, "read .git directory")
}

// Remove working tree files (everything except .git).
topEntries, err := os.ReadDir(repoPath)
if err != nil {
return errors.Wrap(err, "read repo directory")
}
for _, e := range topEntries {
if e.Name() == ".git" {
continue
}
_ = os.RemoveAll(filepath.Join(repoPath, e.Name()))
}

// Move .git/* contents up to repoPath.
for _, e := range entries {
src := filepath.Join(dotGit, e.Name())
dst := filepath.Join(repoPath, e.Name())
if err := os.Rename(src, dst); err != nil {
return errors.Wrapf(err, "move %s", e.Name())
}
}

return errors.Wrap(os.Remove(dotGit), "remove .git directory")
}

func (s *Strategy) maybeBackgroundFetch(repo *gitclone.Repository) {
if !repo.NeedsFetch(s.cloneManager.Config().FetchInterval) {
return
Expand Down
71 changes: 71 additions & 0 deletions internal/strategy/git/snapshot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,77 @@ func TestSnapshotGenerationViaLocalClone(t *testing.T) {
assert.True(t, os.IsNotExist(err))
}

func TestSnapshotRestoreConvertsMirror(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found in PATH")
}

_, ctx := logging.Configure(context.Background(), logging.Config{})
tmpDir := t.TempDir()
mirrorRoot := filepath.Join(tmpDir, "mirrors")
upstreamURL := "https://github.com/org/repo"
serverURL := "http://cachew.example.com"

// Create a mirror repo and generate a snapshot with the cachew server URL.
mirrorPath := filepath.Join(mirrorRoot, "github.com", "org", "repo")
createTestMirrorRepo(t, mirrorPath)

memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{})
assert.NoError(t, err)
mux := newTestMux()

cm := gitclone.NewManagerProvider(ctx, gitclone.Config{MirrorRoot: mirrorRoot}, nil)
s, err := git.New(ctx, git.Config{ServerURL: serverURL}, newTestScheduler(ctx, t), memCache, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil
assert.NoError(t, err)

manager, err := cm()
assert.NoError(t, err)
repo, err := manager.GetOrCreate(ctx, upstreamURL)
assert.NoError(t, err)

err = s.GenerateAndUploadSnapshot(ctx, repo)
assert.NoError(t, err)

// Restore the snapshot into a new directory (simulating mirror restore).
restoreDir := filepath.Join(tmpDir, "restored-mirror")
cacheKey := cache.NewKey(upstreamURL + ".snapshot")
err = snapshot.Restore(ctx, memCache, cacheKey, restoreDir, 0)
assert.NoError(t, err)

// Before conversion: should be non-bare with cachew remote URL.
cmd := exec.Command("git", "-C", restoreDir, "remote", "get-url", "origin")
output, err := cmd.CombinedOutput()
assert.NoError(t, err, string(output))
assert.Equal(t, serverURL+"/git/github.com/org/repo\n", string(output))

_, err = os.Stat(filepath.Join(restoreDir, ".git"))
assert.NoError(t, err, "should have .git directory before conversion")

// Convert snapshot to mirror.
err = git.ConvertSnapshotToMirror(ctx, restoreDir, upstreamURL)
assert.NoError(t, err)

// After conversion: remote should point to upstream.
cmd = exec.Command("git", "-C", restoreDir, "remote", "get-url", "origin")
output, err = cmd.CombinedOutput()
assert.NoError(t, err, string(output))
assert.Equal(t, upstreamURL+"\n", string(output))

// After conversion: should be bare (no .git subdir, config at top level).
_, err = os.Stat(filepath.Join(restoreDir, ".git"))
assert.True(t, os.IsNotExist(err), "should not have .git directory after conversion")

cmd = exec.Command("git", "-C", restoreDir, "config", "core.bare")
output, err = cmd.CombinedOutput()
assert.NoError(t, err, string(output))
assert.Equal(t, "true\n", string(output))

// Verify the repo is functional: git branch should work.
cmd = exec.Command("git", "-C", restoreDir, "branch")
output, err = cmd.CombinedOutput()
assert.NoError(t, err, string(output))
}

func TestSnapshotRemoteURLUsesServerURL(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found in PATH")
Expand Down