Skip to content

Commit 80ec945

Browse files
authored
fix: clean should preserve and remove files from sourceRoots (#1950)
Fixes: #1860
1 parent 89524de commit 80ec945

File tree

5 files changed

+509
-230
lines changed

5 files changed

+509
-230
lines changed

internal/librarian/command.go

Lines changed: 111 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,9 @@ func cleanAndCopyLibrary(state *config.LibrarianState, repoDir, libraryID, outpu
258258
}
259259
}
260260

261-
preservePatterns := append(library.PreserveRegex,
262-
globalPreservePatterns...)
261+
preservePatterns := append(library.PreserveRegex, globalPreservePatterns...)
263262

264-
if err := clean(repoDir, removePatterns, preservePatterns); err != nil {
263+
if err := clean(repoDir, library.SourceRoots, removePatterns, preservePatterns); err != nil {
265264
return fmt.Errorf("failed to clean library, %s: %w", library.ID, err)
266265
}
267266

@@ -455,38 +454,84 @@ func copyFile(dst, src string) (err error) {
455454
return err
456455
}
457456

458-
// clean removes files and directories from a root directory based on remove and preserve patterns.
457+
// clean removes files and directories from source roots based on remove and preserve patterns.
458+
// Limit the possible files when cleaning to those in source roots (not rootDir) as regex patterns
459+
// for preserve and remove should ONLY impact source root files.
459460
//
460461
// It first determines the paths to remove by applying the removePatterns and then excluding any paths
461462
// that match the preservePatterns. It then separates the remaining paths into files and directories and
462463
// removes them, ensuring that directories are removed last.
463464
//
464465
// This logic is ported from owlbot logic: https://github.com/googleapis/repo-automation-bots/blob/12dad68640960290910b660e4325630c9ace494b/packages/owl-bot/src/copy-code.ts#L1027
465-
func clean(rootDir string, removePatterns, preservePatterns []string) error {
466-
slog.Info("cleaning directory", "path", rootDir)
467-
finalPathsToRemove, err := deriveFinalPathsToRemove(rootDir, removePatterns, preservePatterns)
466+
func clean(rootDir string, sourceRoots, removePatterns, preservePatterns []string) error {
467+
slog.Info("cleaning directories", "source roots", sourceRoots)
468+
469+
// relPaths contains a list of files in source root's relative paths from rootDir. The
470+
// regex patterns for preserve and remove apply to a source root's relative path
471+
var relPaths []string
472+
for _, sourceRoot := range sourceRoots {
473+
sourceRootPath := filepath.Join(rootDir, sourceRoot)
474+
// If a source root does not exist, log a warning and searching the other source roots.
475+
// TODO: Consider not calling clean if it's a first time generation
476+
if _, err := os.Lstat(sourceRootPath); err != nil {
477+
if os.IsNotExist(err) {
478+
slog.Warn("Unable to find source root. It may be an initial generation request", "source root", sourceRoot)
479+
continue
480+
}
481+
// For any other error (permissions, I/O, etc.)
482+
slog.Error("Error trying to clean source root", "source root", sourceRoot, "error", err)
483+
return err
484+
}
485+
sourceRootPaths, err := findSubDirRelPaths(rootDir, sourceRootPath)
486+
if err != nil {
487+
// Log a warning and continue processing other source roots. There may be other files
488+
// that can be cleaned up.
489+
slog.Warn("unable to search for files in a source root", "source root", sourceRoot, "error", err)
490+
continue
491+
}
492+
if len(sourceRootPaths) == 0 {
493+
slog.Info("source root does not contain any files", "source root", sourceRoot)
494+
}
495+
relPaths = append(relPaths, sourceRootPaths...)
496+
}
497+
498+
if len(relPaths) == 0 {
499+
slog.Info("There are no files to be cleaned in source roots", "source roots", sourceRoots)
500+
return nil
501+
}
502+
503+
pathsToRemove, err := filterPathsForRemoval(relPaths, removePatterns, preservePatterns)
468504
if err != nil {
469505
return err
470506
}
471507

472-
filesToRemove, dirsToRemove, err := separateFilesAndDirs(rootDir, finalPathsToRemove)
508+
// prepend the rootDir to each path to ensure that os.Remove can find the file
509+
var paths []string
510+
for _, path := range pathsToRemove {
511+
paths = append(paths, filepath.Join(rootDir, path))
512+
}
513+
514+
filesToRemove, dirsToRemove, err := separateFilesAndDirs(paths)
473515
if err != nil {
474516
return err
475517
}
476518

477519
// Remove files first, then directories.
478520
for _, file := range filesToRemove {
479521
slog.Info("removing file", "path", file)
480-
if err := os.Remove(filepath.Join(rootDir, file)); err != nil {
522+
if err := os.Remove(file); err != nil {
481523
return err
482524
}
483525
}
484526

485-
sortDirsByDepth(dirsToRemove)
527+
// Sort to remove the child directories first
528+
slices.SortFunc(dirsToRemove, func(a, b string) int {
529+
return strings.Count(b, string(filepath.Separator)) - strings.Count(a, string(filepath.Separator))
530+
})
486531

487532
for _, dir := range dirsToRemove {
488533
slog.Info("removing directory", "path", dir)
489-
if err := os.Remove(filepath.Join(rootDir, dir)); err != nil {
534+
if err := os.Remove(dir); err != nil {
490535
// It's possible the directory is not empty due to preserved files.
491536
slog.Warn("failed to remove directory, it may not be empty", "dir", dir, "err", err)
492537
}
@@ -495,34 +540,38 @@ func clean(rootDir string, removePatterns, preservePatterns []string) error {
495540
return nil
496541
}
497542

498-
// sortDirsByDepth sorts directories by depth (descending) to remove children first.
499-
func sortDirsByDepth(dirs []string) {
500-
slices.SortFunc(dirs, func(a, b string) int {
501-
return strings.Count(b, string(filepath.Separator)) - strings.Count(a, string(filepath.Separator))
502-
})
503-
}
543+
// findSubDirRelPaths walks the subDir tree returns a slice of all file and directory paths
544+
// relative to the dir. This is repeated for all nested directories. subDir must be under
545+
// or the same as dir.
546+
func findSubDirRelPaths(dir, subDir string) ([]string, error) {
547+
dirRelPath, err := filepath.Rel(dir, subDir)
548+
if err != nil {
549+
return nil, fmt.Errorf("cannot establish the relationship between %s and %s: %w", dir, subDir, err)
550+
}
551+
// '..' signifies that the subDir exists outside of dir
552+
if strings.HasPrefix(dirRelPath, "..") {
553+
return nil, fmt.Errorf("subDir is not nested within the dir: %s, %s", subDir, dir)
554+
}
504555

505-
// allPaths walks the directory tree rooted at rootDir and returns a slice of all
506-
// file and directory paths, relative to rootDir.
507-
func allPaths(rootDir string) ([]string, error) {
508-
var paths []string
509-
err := filepath.WalkDir(rootDir, func(path string, d fs.DirEntry, err error) error {
556+
paths := []string{}
557+
err = filepath.WalkDir(subDir, func(path string, d fs.DirEntry, err error) error {
510558
if err != nil {
511559
return err
512560
}
513-
relPath, err := filepath.Rel(rootDir, path)
514-
if err != nil {
515-
return err
561+
// error is ignored as we have confirmed that subDir is child or equal to rootDir
562+
relPath, _ := filepath.Rel(dir, path)
563+
// Special case when subDir is equal to dir. Drop the "." as it references itself
564+
if relPath != "." {
565+
paths = append(paths, relPath)
516566
}
517-
paths = append(paths, relPath)
518567
return nil
519568
})
520569
return paths, err
521570
}
522571

523-
// filterPaths returns a new slice containing only the paths from the input slice
572+
// filterPathsByRegex returns a new slice containing only the paths from the input slice
524573
// that match at least one of the provided regular expressions.
525-
func filterPaths(paths []string, regexps []*regexp.Regexp) []string {
574+
func filterPathsByRegex(paths []string, regexps []*regexp.Regexp) []string {
526575
var filtered []string
527576
for _, path := range paths {
528577
for _, re := range regexps {
@@ -535,10 +584,26 @@ func filterPaths(paths []string, regexps []*regexp.Regexp) []string {
535584
return filtered
536585
}
537586

538-
// deriveFinalPathsToRemove determines the final set of paths to be removed. It
539-
// starts with all paths under rootDir, filters them based on removePatterns,
540-
// and then excludes any paths that match preservePatterns.
541-
func deriveFinalPathsToRemove(rootDir string, removePatterns, preservePatterns []string) ([]string, error) {
587+
// compileRegexps takes a slice of string patterns and compiles each one into a
588+
// regular expression. It returns a slice of compiled regexps or an error if any
589+
// pattern is invalid.
590+
func compileRegexps(patterns []string) ([]*regexp.Regexp, error) {
591+
var regexps []*regexp.Regexp
592+
for _, pattern := range patterns {
593+
re, err := regexp.Compile(pattern)
594+
if err != nil {
595+
return nil, fmt.Errorf("invalid regex %q: %w", pattern, err)
596+
}
597+
regexps = append(regexps, re)
598+
}
599+
return regexps, nil
600+
}
601+
602+
// filterPathsForRemoval determines the list of paths to be removed. The logic runs as follows:
603+
// 1. paths that match any removePatterns are marked for removal
604+
// 2. paths that match the preservePatterns are kept (even if they match removePatterns)
605+
// Paths that match both are kept as preserve has overrides.
606+
func filterPathsForRemoval(paths, removePatterns, preservePatterns []string) ([]string, error) {
542607
removeRegexps, err := compileRegexps(removePatterns)
543608
if err != nil {
544609
return nil, err
@@ -548,61 +613,42 @@ func deriveFinalPathsToRemove(rootDir string, removePatterns, preservePatterns [
548613
return nil, err
549614
}
550615

551-
allPaths, err := allPaths(rootDir)
552-
if err != nil {
553-
return nil, err
554-
}
555-
556-
pathsToRemove := filterPaths(allPaths, removeRegexps)
557-
pathsToPreserve := filterPaths(pathsToRemove, preserveRegexps)
616+
pathsToRemove := filterPathsByRegex(paths, removeRegexps)
617+
pathsToPreserve := filterPathsByRegex(pathsToRemove, preserveRegexps)
558618

559-
// delete pathsToPreserve from pathsToRemove.
560-
pathsToDelete := make(map[string]bool)
619+
// map for a quick lookup for any preserve paths
620+
preserveMap := make(map[string]bool)
561621
for _, p := range pathsToPreserve {
562-
pathsToDelete[p] = true
622+
preserveMap[p] = true
563623
}
564624
finalPathsToRemove := slices.DeleteFunc(pathsToRemove, func(path string) bool {
565-
return pathsToDelete[path]
625+
return preserveMap[path]
566626
})
567627
return finalPathsToRemove, nil
568628
}
569629

570630
// separateFilesAndDirs takes a list of paths and categorizes them into files
571631
// and directories. It uses os.Lstat to avoid following symlinks, treating them
572632
// as files. Paths that do not exist are silently ignored.
573-
func separateFilesAndDirs(rootDir string, paths []string) ([]string, []string, error) {
574-
var files, dirs []string
633+
func separateFilesAndDirs(paths []string) ([]string, []string, error) {
634+
var filePaths, dirPaths []string
575635
for _, path := range paths {
576-
info, err := os.Lstat(filepath.Join(rootDir, path))
636+
info, err := os.Lstat(path)
577637
if err != nil {
638+
// The file or directory may have already been removed.
578639
if errors.Is(err, os.ErrNotExist) {
579-
// The file or directory may have already been removed.
640+
slog.Warn("unable to find path", "path", path)
580641
continue
581642
}
582643
// For any other error (permissions, I/O, etc.)
583644
return nil, nil, fmt.Errorf("failed to stat path %q: %w", path, err)
584645

585646
}
586647
if info.IsDir() {
587-
dirs = append(dirs, path)
648+
dirPaths = append(dirPaths, path)
588649
} else {
589-
files = append(files, path)
650+
filePaths = append(filePaths, path)
590651
}
591652
}
592-
return files, dirs, nil
593-
}
594-
595-
// compileRegexps takes a slice of string patterns and compiles each one into a
596-
// regular expression. It returns a slice of compiled regexps or an error if any
597-
// pattern is invalid.
598-
func compileRegexps(patterns []string) ([]*regexp.Regexp, error) {
599-
var regexps []*regexp.Regexp
600-
for _, pattern := range patterns {
601-
re, err := regexp.Compile(pattern)
602-
if err != nil {
603-
return nil, fmt.Errorf("invalid regex %q: %w", pattern, err)
604-
}
605-
regexps = append(regexps, re)
606-
}
607-
return regexps, nil
653+
return filePaths, dirPaths, nil
608654
}

0 commit comments

Comments
 (0)