Skip to content

Commit 3ee39a6

Browse files
zhumin8JoeWang1127
andauthored
feat(internal/librarian): add clean logic to generate (#871)
port over owl-bot clean logic - port over clean logic from owl-bot [code](https://github.com/googleapis/repo-automation-bots/blob/12dad68640960290910b660e4325630c9ace494b/packages/owl-bot/src/copy-code.ts#L1027) - Configurations: Use regex for preserve and remove config in [state.yaml](https://github.com/googleapis/librarian/blob/a5695eb2bff6ef813cac3c5ed005c3448bfb5da2/internal/config/state.go#L96-L101). This is the consistent with old owl-bot logic ([ref](https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/src/config-files.ts#L166-L176)). - The clean logic can deal with symlink (see added tests [here](https://github.com/googleapis/librarian/blob/644f67303089394716911b78d57069344fc45ade/internal/librarian/generate_test.go#L510-L532)). But os.CopyFS() in Go 1.24 does not support symlink and throws error. (Will support in next 1.25 release) Added a comment for it. (Python confirmed it is non-blocking for onboarding) Will try to fix test coverage while in review Fixes #775 --------- Signed-off-by: Min Zhu <[email protected]> Co-authored-by: Joe Wang <[email protected]>
1 parent 07e9327 commit 3ee39a6

File tree

3 files changed

+873
-20
lines changed

3 files changed

+873
-20
lines changed

internal/librarian/generate.go

Lines changed: 183 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,13 @@ import (
1818
"context"
1919
"errors"
2020
"fmt"
21+
"io/fs"
2122
"log/slog"
2223
"os"
2324
"path/filepath"
25+
"regexp"
26+
"slices"
27+
"strings"
2428
"time"
2529

2630
"github.com/googleapis/librarian/internal/cli"
@@ -171,13 +175,14 @@ func (r *generateRunner) run(ctx context.Context) error {
171175
return err
172176
}
173177

174-
if err := r.runBuildCommand(ctx, outputDir, libraryID); err != nil {
178+
if err := r.runBuildCommand(ctx, libraryID); err != nil {
175179
return err
176180
}
177181
return nil
178182
}
179183

180-
// runGenerateCommand attempts to perform generation for an API.
184+
// runGenerateCommand attempts to perform generation for an API. It then cleans the
185+
// destination directory and copies the newly generated files into it.
181186
//
182187
// If successful, it returns the ID of the generated library; otherwise, it
183188
// returns an empty string and an error.
@@ -204,18 +209,43 @@ func (r *generateRunner) runGenerateCommand(ctx context.Context, outputDir strin
204209
RepoDir: r.repo.Dir,
205210
}
206211
slog.Info("Performing refined generation for library", "id", libraryID)
207-
return libraryID, r.containerClient.Generate(ctx, generateRequest)
212+
if err := r.containerClient.Generate(ctx, generateRequest); err != nil {
213+
return "", err
214+
}
215+
216+
if err := r.cleanAndCopyLibrary(libraryID, outputDir); err != nil {
217+
return "", err
218+
}
219+
return libraryID, nil
208220
}
221+
209222
slog.Info("No matching library found (or no repo specified)", "path", r.cfg.API)
210223
return "", fmt.Errorf("library not found")
211224
}
212225

226+
func (r *generateRunner) cleanAndCopyLibrary(libraryID, outputDir string) error {
227+
library := findLibraryByID(r.state, libraryID)
228+
if library == nil {
229+
return fmt.Errorf("library %q not found during clean and copy, despite being found in earlier steps", libraryID)
230+
}
231+
if err := clean(r.repo.Dir, library.RemoveRegex, library.PreserveRegex); err != nil {
232+
return err
233+
}
234+
// os.CopyFS in Go1.24 returns error when copying from a symbolic link
235+
// https://github.com/golang/go/blob/9d828e80fa1f3cc52de60428cae446b35b576de8/src/os/dir.go#L143-L144
236+
if err := os.CopyFS(r.repo.Dir, os.DirFS(outputDir)); err != nil {
237+
return err
238+
}
239+
slog.Info("Library updated", "id", libraryID)
240+
return nil
241+
}
242+
213243
// runBuildCommand orchestrates the building of an API library using a containerized
214244
// environment.
215245
//
216246
// The `outputDir` parameter specifies the target directory where the built artifacts
217247
// should be placed.
218-
func (r *generateRunner) runBuildCommand(ctx context.Context, outputDir, libraryID string) error {
248+
func (r *generateRunner) runBuildCommand(ctx context.Context, libraryID string) error {
219249
if !r.cfg.Build {
220250
slog.Info("Build flag not specified, skipping")
221251
return nil
@@ -231,14 +261,159 @@ func (r *generateRunner) runBuildCommand(ctx context.Context, outputDir, library
231261
LibraryID: libraryID,
232262
RepoDir: r.repo.Dir,
233263
}
234-
235264
slog.Info("Build requested in the context of refined generation; cleaning and copying code to the local language repo before building.")
236-
// TODO(https://github.com/googleapis/librarian/issues/775)
237-
if err := os.CopyFS(r.repo.Dir, os.DirFS(outputDir)); err != nil {
265+
return r.containerClient.Build(ctx, buildRequest)
266+
}
267+
268+
// clean removes files and directories from a root directory based on remove and preserve patterns.
269+
//
270+
// It first determines the paths to remove by applying the removePatterns and then excluding any paths
271+
// that match the preservePatterns. It then separates the remaining paths into files and directories and
272+
// removes them, ensuring that directories are removed last.
273+
//
274+
// This logic is ported from owlbot logic: https://github.com/googleapis/repo-automation-bots/blob/12dad68640960290910b660e4325630c9ace494b/packages/owl-bot/src/copy-code.ts#L1027
275+
func clean(rootDir string, removePatterns, preservePatterns []string) error {
276+
finalPathsToRemove, err := deriveFinalPathsToRemove(rootDir, removePatterns, preservePatterns)
277+
if err != nil {
238278
return err
239279
}
240280

241-
return r.containerClient.Build(ctx, buildRequest)
281+
filesToRemove, dirsToRemove, err := separateFilesAndDirs(rootDir, finalPathsToRemove)
282+
if err != nil {
283+
return err
284+
}
285+
286+
// Remove files first, then directories.
287+
for _, file := range filesToRemove {
288+
slog.Info("Removing file", "path", file)
289+
if err := os.Remove(filepath.Join(rootDir, file)); err != nil {
290+
return err
291+
}
292+
}
293+
294+
sortDirsByDepth(dirsToRemove)
295+
296+
for _, dir := range dirsToRemove {
297+
slog.Info("Removing directory", "path", dir)
298+
if err := os.Remove(filepath.Join(rootDir, dir)); err != nil {
299+
// It's possible the directory is not empty due to preserved files.
300+
slog.Warn("failed to remove directory, it may not be empty", "dir", dir, "err", err)
301+
}
302+
}
303+
304+
return nil
305+
}
306+
307+
// sortDirsByDepth sorts directories by depth (descending) to remove children first.
308+
func sortDirsByDepth(dirs []string) {
309+
slices.SortFunc(dirs, func(a, b string) int {
310+
return strings.Count(b, string(filepath.Separator)) - strings.Count(a, string(filepath.Separator))
311+
})
312+
}
313+
314+
// allPaths walks the directory tree rooted at rootDir and returns a slice of all
315+
// file and directory paths, relative to rootDir.
316+
func allPaths(rootDir string) ([]string, error) {
317+
var paths []string
318+
err := filepath.WalkDir(rootDir, func(path string, d fs.DirEntry, err error) error {
319+
if err != nil {
320+
return err
321+
}
322+
relPath, err := filepath.Rel(rootDir, path)
323+
if err != nil {
324+
return err
325+
}
326+
paths = append(paths, relPath)
327+
return nil
328+
})
329+
return paths, err
330+
}
331+
332+
// filterPaths returns a new slice containing only the paths from the input slice
333+
// that match at least one of the provided regular expressions.
334+
func filterPaths(paths []string, regexps []*regexp.Regexp) []string {
335+
var filtered []string
336+
for _, path := range paths {
337+
for _, re := range regexps {
338+
if re.MatchString(path) {
339+
filtered = append(filtered, path)
340+
break
341+
}
342+
}
343+
}
344+
return filtered
345+
}
346+
347+
// deriveFinalPathsToRemove determines the final set of paths to be removed. It
348+
// starts with all paths under rootDir, filters them based on removePatterns,
349+
// and then excludes any paths that match preservePatterns.
350+
func deriveFinalPathsToRemove(rootDir string, removePatterns, preservePatterns []string) ([]string, error) {
351+
removeRegexps, err := compileRegexps(removePatterns)
352+
if err != nil {
353+
return nil, err
354+
}
355+
preserveRegexps, err := compileRegexps(preservePatterns)
356+
if err != nil {
357+
return nil, err
358+
}
359+
360+
allPaths, err := allPaths(rootDir)
361+
if err != nil {
362+
return nil, err
363+
}
364+
365+
pathsToRemove := filterPaths(allPaths, removeRegexps)
366+
pathsToPreserve := filterPaths(pathsToRemove, preserveRegexps)
367+
368+
// delete pathsToPreserve from pathsToRemove.
369+
pathsToDelete := make(map[string]bool)
370+
for _, p := range pathsToPreserve {
371+
pathsToDelete[p] = true
372+
}
373+
finalPathsToRemove := slices.DeleteFunc(pathsToRemove, func(path string) bool {
374+
return pathsToDelete[path]
375+
})
376+
return finalPathsToRemove, nil
377+
}
378+
379+
// separateFilesAndDirs takes a list of paths and categorizes them into files
380+
// and directories. It uses os.Lstat to avoid following symlinks, treating them
381+
// as files. Paths that do not exist are silently ignored.
382+
func separateFilesAndDirs(rootDir string, paths []string) ([]string, []string, error) {
383+
var files, dirs []string
384+
for _, path := range paths {
385+
info, err := os.Lstat(filepath.Join(rootDir, path))
386+
if err != nil {
387+
if errors.Is(err, os.ErrNotExist) {
388+
// The file or directory may have already been removed.
389+
continue
390+
}
391+
// For any other error (permissions, I/O, etc.)
392+
return nil, nil, fmt.Errorf("failed to stat path %q: %w", path, err)
393+
394+
}
395+
if info.IsDir() {
396+
dirs = append(dirs, path)
397+
} else {
398+
files = append(files, path)
399+
}
400+
}
401+
return files, dirs, nil
402+
}
403+
404+
// compileRegexps takes a slice of string patterns and compiles each one into a
405+
// regular expression. It returns a slice of compiled regexps or an error if any
406+
// pattern is invalid.
407+
func compileRegexps(patterns []string) ([]*regexp.Regexp, error) {
408+
var regexps []*regexp.Regexp
409+
for _, pattern := range patterns {
410+
re, err := regexp.Compile(pattern)
411+
if err != nil {
412+
return nil, fmt.Errorf("invalid regex %q: %w", pattern, err)
413+
}
414+
regexps = append(regexps, re)
415+
}
416+
return regexps, nil
242417
}
243418

244419
// detectIfLibraryConfigured returns whether a library has been configured for

0 commit comments

Comments
 (0)