@@ -258,10 +258,9 @@ func cleanAndCopyLibrary(state *config.LibrarianState, repoDir, libraryID, outpu
258258 }
259259 }
260260
261- preservePatterns := append (library .PreserveRegex ,
262- globalPreservePatterns ... )
261+ preservePatterns := append (library .PreserveRegex , globalPreservePatterns ... )
263262
264- if err := clean (repoDir , removePatterns , preservePatterns ); err != nil {
263+ if err := clean (repoDir , library . SourceRoots , removePatterns , preservePatterns ); err != nil {
265264 return fmt .Errorf ("failed to clean library, %s: %w" , library .ID , err )
266265 }
267266
@@ -455,38 +454,84 @@ func copyFile(dst, src string) (err error) {
455454 return err
456455}
457456
458- // clean removes files and directories from a root directory based on remove and preserve patterns.
457+ // clean removes files and directories from source roots based on remove and preserve patterns.
458+ // Limit the possible files when cleaning to those in source roots (not rootDir) as regex patterns
459+ // for preserve and remove should ONLY impact source root files.
459460//
460461// It first determines the paths to remove by applying the removePatterns and then excluding any paths
461462// that match the preservePatterns. It then separates the remaining paths into files and directories and
462463// removes them, ensuring that directories are removed last.
463464//
464465// This logic is ported from owlbot logic: https://github.com/googleapis/repo-automation-bots/blob/12dad68640960290910b660e4325630c9ace494b/packages/owl-bot/src/copy-code.ts#L1027
465- func clean (rootDir string , removePatterns , preservePatterns []string ) error {
466- slog .Info ("cleaning directory" , "path" , rootDir )
467- finalPathsToRemove , err := deriveFinalPathsToRemove (rootDir , removePatterns , preservePatterns )
466+ func clean (rootDir string , sourceRoots , removePatterns , preservePatterns []string ) error {
467+ slog .Info ("cleaning directories" , "source roots" , sourceRoots )
468+
469+ // relPaths contains a list of files in source root's relative paths from rootDir. The
470+ // regex patterns for preserve and remove apply to a source root's relative path
471+ var relPaths []string
472+ for _ , sourceRoot := range sourceRoots {
473+ sourceRootPath := filepath .Join (rootDir , sourceRoot )
474+ // If a source root does not exist, log a warning and searching the other source roots.
475+ // TODO: Consider not calling clean if it's a first time generation
476+ if _ , err := os .Lstat (sourceRootPath ); err != nil {
477+ if os .IsNotExist (err ) {
478+ slog .Warn ("Unable to find source root. It may be an initial generation request" , "source root" , sourceRoot )
479+ continue
480+ }
481+ // For any other error (permissions, I/O, etc.)
482+ slog .Error ("Error trying to clean source root" , "source root" , sourceRoot , "error" , err )
483+ return err
484+ }
485+ sourceRootPaths , err := findSubDirRelPaths (rootDir , sourceRootPath )
486+ if err != nil {
487+ // Log a warning and continue processing other source roots. There may be other files
488+ // that can be cleaned up.
489+ slog .Warn ("unable to search for files in a source root" , "source root" , sourceRoot , "error" , err )
490+ continue
491+ }
492+ if len (sourceRootPaths ) == 0 {
493+ slog .Info ("source root does not contain any files" , "source root" , sourceRoot )
494+ }
495+ relPaths = append (relPaths , sourceRootPaths ... )
496+ }
497+
498+ if len (relPaths ) == 0 {
499+ slog .Info ("There are no files to be cleaned in source roots" , "source roots" , sourceRoots )
500+ return nil
501+ }
502+
503+ pathsToRemove , err := filterPathsForRemoval (relPaths , removePatterns , preservePatterns )
468504 if err != nil {
469505 return err
470506 }
471507
472- filesToRemove , dirsToRemove , err := separateFilesAndDirs (rootDir , finalPathsToRemove )
508+ // prepend the rootDir to each path to ensure that os.Remove can find the file
509+ var paths []string
510+ for _ , path := range pathsToRemove {
511+ paths = append (paths , filepath .Join (rootDir , path ))
512+ }
513+
514+ filesToRemove , dirsToRemove , err := separateFilesAndDirs (paths )
473515 if err != nil {
474516 return err
475517 }
476518
477519 // Remove files first, then directories.
478520 for _ , file := range filesToRemove {
479521 slog .Info ("removing file" , "path" , file )
480- if err := os .Remove (filepath . Join ( rootDir , file ) ); err != nil {
522+ if err := os .Remove (file ); err != nil {
481523 return err
482524 }
483525 }
484526
485- sortDirsByDepth (dirsToRemove )
527+ // Sort to remove the child directories first
528+ slices .SortFunc (dirsToRemove , func (a , b string ) int {
529+ return strings .Count (b , string (filepath .Separator )) - strings .Count (a , string (filepath .Separator ))
530+ })
486531
487532 for _ , dir := range dirsToRemove {
488533 slog .Info ("removing directory" , "path" , dir )
489- if err := os .Remove (filepath . Join ( rootDir , dir ) ); err != nil {
534+ if err := os .Remove (dir ); err != nil {
490535 // It's possible the directory is not empty due to preserved files.
491536 slog .Warn ("failed to remove directory, it may not be empty" , "dir" , dir , "err" , err )
492537 }
@@ -495,34 +540,38 @@ func clean(rootDir string, removePatterns, preservePatterns []string) error {
495540 return nil
496541}
497542
498- // sortDirsByDepth sorts directories by depth (descending) to remove children first.
499- func sortDirsByDepth (dirs []string ) {
500- slices .SortFunc (dirs , func (a , b string ) int {
501- return strings .Count (b , string (filepath .Separator )) - strings .Count (a , string (filepath .Separator ))
502- })
503- }
543+ // findSubDirRelPaths walks the subDir tree returns a slice of all file and directory paths
544+ // relative to the dir. This is repeated for all nested directories. subDir must be under
545+ // or the same as dir.
546+ func findSubDirRelPaths (dir , subDir string ) ([]string , error ) {
547+ dirRelPath , err := filepath .Rel (dir , subDir )
548+ if err != nil {
549+ return nil , fmt .Errorf ("cannot establish the relationship between %s and %s: %w" , dir , subDir , err )
550+ }
551+ // '..' signifies that the subDir exists outside of dir
552+ if strings .HasPrefix (dirRelPath , ".." ) {
553+ return nil , fmt .Errorf ("subDir is not nested within the dir: %s, %s" , subDir , dir )
554+ }
504555
505- // allPaths walks the directory tree rooted at rootDir and returns a slice of all
506- // file and directory paths, relative to rootDir.
507- func allPaths (rootDir string ) ([]string , error ) {
508- var paths []string
509- err := filepath .WalkDir (rootDir , func (path string , d fs.DirEntry , err error ) error {
556+ paths := []string {}
557+ err = filepath .WalkDir (subDir , func (path string , d fs.DirEntry , err error ) error {
510558 if err != nil {
511559 return err
512560 }
513- relPath , err := filepath .Rel (rootDir , path )
514- if err != nil {
515- return err
561+ // error is ignored as we have confirmed that subDir is child or equal to rootDir
562+ relPath , _ := filepath .Rel (dir , path )
563+ // Special case when subDir is equal to dir. Drop the "." as it references itself
564+ if relPath != "." {
565+ paths = append (paths , relPath )
516566 }
517- paths = append (paths , relPath )
518567 return nil
519568 })
520569 return paths , err
521570}
522571
523- // filterPaths returns a new slice containing only the paths from the input slice
572+ // filterPathsByRegex returns a new slice containing only the paths from the input slice
524573// that match at least one of the provided regular expressions.
525- func filterPaths (paths []string , regexps []* regexp.Regexp ) []string {
574+ func filterPathsByRegex (paths []string , regexps []* regexp.Regexp ) []string {
526575 var filtered []string
527576 for _ , path := range paths {
528577 for _ , re := range regexps {
@@ -535,10 +584,26 @@ func filterPaths(paths []string, regexps []*regexp.Regexp) []string {
535584 return filtered
536585}
537586
538- // deriveFinalPathsToRemove determines the final set of paths to be removed. It
539- // starts with all paths under rootDir, filters them based on removePatterns,
540- // and then excludes any paths that match preservePatterns.
541- func deriveFinalPathsToRemove (rootDir string , removePatterns , preservePatterns []string ) ([]string , error ) {
587+ // compileRegexps takes a slice of string patterns and compiles each one into a
588+ // regular expression. It returns a slice of compiled regexps or an error if any
589+ // pattern is invalid.
590+ func compileRegexps (patterns []string ) ([]* regexp.Regexp , error ) {
591+ var regexps []* regexp.Regexp
592+ for _ , pattern := range patterns {
593+ re , err := regexp .Compile (pattern )
594+ if err != nil {
595+ return nil , fmt .Errorf ("invalid regex %q: %w" , pattern , err )
596+ }
597+ regexps = append (regexps , re )
598+ }
599+ return regexps , nil
600+ }
601+
602+ // filterPathsForRemoval determines the list of paths to be removed. The logic runs as follows:
603+ // 1. paths that match any removePatterns are marked for removal
604+ // 2. paths that match the preservePatterns are kept (even if they match removePatterns)
605+ // Paths that match both are kept as preserve has overrides.
606+ func filterPathsForRemoval (paths , removePatterns , preservePatterns []string ) ([]string , error ) {
542607 removeRegexps , err := compileRegexps (removePatterns )
543608 if err != nil {
544609 return nil , err
@@ -548,61 +613,42 @@ func deriveFinalPathsToRemove(rootDir string, removePatterns, preservePatterns [
548613 return nil , err
549614 }
550615
551- allPaths , err := allPaths (rootDir )
552- if err != nil {
553- return nil , err
554- }
555-
556- pathsToRemove := filterPaths (allPaths , removeRegexps )
557- pathsToPreserve := filterPaths (pathsToRemove , preserveRegexps )
616+ pathsToRemove := filterPathsByRegex (paths , removeRegexps )
617+ pathsToPreserve := filterPathsByRegex (pathsToRemove , preserveRegexps )
558618
559- // delete pathsToPreserve from pathsToRemove.
560- pathsToDelete := make (map [string ]bool )
619+ // map for a quick lookup for any preserve paths
620+ preserveMap := make (map [string ]bool )
561621 for _ , p := range pathsToPreserve {
562- pathsToDelete [p ] = true
622+ preserveMap [p ] = true
563623 }
564624 finalPathsToRemove := slices .DeleteFunc (pathsToRemove , func (path string ) bool {
565- return pathsToDelete [path ]
625+ return preserveMap [path ]
566626 })
567627 return finalPathsToRemove , nil
568628}
569629
570630// separateFilesAndDirs takes a list of paths and categorizes them into files
571631// and directories. It uses os.Lstat to avoid following symlinks, treating them
572632// as files. Paths that do not exist are silently ignored.
573- func separateFilesAndDirs (rootDir string , paths []string ) ([]string , []string , error ) {
574- var files , dirs []string
633+ func separateFilesAndDirs (paths []string ) ([]string , []string , error ) {
634+ var filePaths , dirPaths []string
575635 for _ , path := range paths {
576- info , err := os .Lstat (filepath . Join ( rootDir , path ) )
636+ info , err := os .Lstat (path )
577637 if err != nil {
638+ // The file or directory may have already been removed.
578639 if errors .Is (err , os .ErrNotExist ) {
579- // The file or directory may have already been removed.
640+ slog . Warn ( "unable to find path" , "path" , path )
580641 continue
581642 }
582643 // For any other error (permissions, I/O, etc.)
583644 return nil , nil , fmt .Errorf ("failed to stat path %q: %w" , path , err )
584645
585646 }
586647 if info .IsDir () {
587- dirs = append (dirs , path )
648+ dirPaths = append (dirPaths , path )
588649 } else {
589- files = append (files , path )
650+ filePaths = append (filePaths , path )
590651 }
591652 }
592- return files , dirs , nil
593- }
594-
595- // compileRegexps takes a slice of string patterns and compiles each one into a
596- // regular expression. It returns a slice of compiled regexps or an error if any
597- // pattern is invalid.
598- func compileRegexps (patterns []string ) ([]* regexp.Regexp , error ) {
599- var regexps []* regexp.Regexp
600- for _ , pattern := range patterns {
601- re , err := regexp .Compile (pattern )
602- if err != nil {
603- return nil , fmt .Errorf ("invalid regex %q: %w" , pattern , err )
604- }
605- regexps = append (regexps , re )
606- }
607- return regexps , nil
653+ return filePaths , dirPaths , nil
608654}
0 commit comments