Skip to content

Commit 15ea2bc

Browse files
committed
Fix config file loading and add option to output current configuration
1 parent d182a6e commit 15ea2bc

File tree

5 files changed

+115
-44
lines changed

5 files changed

+115
-44
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ vendor
2828
/dist
2929

3030
dist/
31+
/grawler.yaml

cmd/grawl.go

Lines changed: 108 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
package cmd
22

33
import (
4-
"github.com/robole-dev/grawler/internal/configs"
4+
"fmt"
55
"github.com/robole-dev/grawler/internal/grawl"
6+
"github.com/spf13/cast"
67
"github.com/spf13/cobra"
8+
"github.com/spf13/viper"
9+
"log"
710
)
811

912
var (
@@ -21,59 +24,129 @@ var (
2124
}
2225
)
2326

24-
func init() {
25-
keyPrefix := "grawl"
27+
const (
28+
viperGrawlPrefix = "grawl"
29+
flagNameDelay = "delay"
30+
flagNameRandomDelay = "random-delay"
31+
flagNameMaxDepth = "max-depth"
32+
flagNameOutputFilepath = "output-filepath"
33+
flagNameParallel = "parallel"
34+
flagNameUsername = "username"
35+
flagNamePassword = "password"
36+
flagNameUserAgent = "user-agent"
37+
flagNameSitemap = "sitemap"
38+
flagNameAllowedDomains = "allowed-domains"
39+
flagNameRespectRobotsTxt = "respect-robots-txt"
40+
flagNamePath = "path"
41+
flagNameCheckAll = "check-all"
42+
flagNameRequestTimeout = "request-timeout"
43+
flagNameUrlFilters = "url-filters"
44+
flagNameDisallowedURLFilters = "disallowed-url-filters"
45+
)
2646

27-
grawlCmd.Flags().Int64VarP(&grawlFlags.FlagDelay, "delay", "d", 0, "Delay between requests in milliseconds. (default 0)")
28-
configs.BindViperFlag(grawlCmd, keyPrefix, "delay")
47+
func init() {
48+
grawlCmd.Flags().Int64VarP(&grawlFlags.FlagDelay, flagNameDelay, "d", 0, "Delay between requests in milliseconds. (default 0)")
49+
bindViperFlag(flagNameDelay)
2950

30-
grawlCmd.Flags().Int64Var(&grawlFlags.FlagRandomDelay, "random-delay", 0, "Max random delay between requests in milliseconds. (default 0 for no random delay)")
31-
configs.BindViperFlag(grawlCmd, keyPrefix, "random-delay")
51+
grawlCmd.Flags().Int64Var(&grawlFlags.FlagRandomDelay, flagNameRandomDelay, 0, "Max random delay between requests in milliseconds. (default 0 for no random delay)")
52+
bindViperFlag(flagNameRandomDelay)
3253

33-
grawlCmd.Flags().IntVarP(&grawlFlags.FlagMaxDepth, "max-depth", "m", 0, "Set it to 0 for infinite recursion. (default 0)")
34-
configs.BindViperFlag(grawlCmd, keyPrefix, "max-depth")
54+
grawlCmd.Flags().IntVarP(&grawlFlags.FlagMaxDepth, flagNameMaxDepth, "m", 0, "Set it to 0 for infinite recursion. (default 0)")
55+
bindViperFlag(flagNameMaxDepth)
3556

36-
grawlCmd.Flags().StringVarP(&grawlFlags.FlagOutputFilename, "output-filepath", "o", "", "Write statistic data of each request to this file.")
37-
configs.BindViperFlag(grawlCmd, keyPrefix, "output-filepath")
57+
grawlCmd.Flags().StringVarP(&grawlFlags.FlagOutputFilename, flagNameOutputFilepath, "o", "", "Write statistic data of each request to this file.")
58+
bindViperFlag(flagNameOutputFilepath)
3859

39-
grawlCmd.Flags().IntVarP(&grawlFlags.FlagParallel, "parallel", "l", 1, "Number of parallel requests.")
40-
configs.BindViperFlag(grawlCmd, keyPrefix, "parallel")
60+
grawlCmd.Flags().IntVarP(&grawlFlags.FlagParallel, flagNameParallel, "l", 1, "Number of parallel requests.")
61+
bindViperFlag(flagNameParallel)
4162

42-
grawlCmd.Flags().StringVarP(&grawlFlags.FlagUsername, "username", "u", "", "Use this for HTTP Basic Authentication. If you omit the password-flag a prompt will ask for the password.")
43-
configs.BindViperFlag(grawlCmd, keyPrefix, "username")
63+
grawlCmd.Flags().StringVarP(&grawlFlags.FlagUsername, flagNameUsername, "u", "", "Use this for HTTP Basic Authentication. If you omit the password-flag a prompt will ask for the password.")
64+
bindViperFlag(flagNameUsername)
4465

45-
grawlCmd.Flags().StringVarP(&grawlFlags.FlagPassword, "password", "p", "", "Use this for HTTP Basic Authentication.")
46-
configs.BindViperFlag(grawlCmd, keyPrefix, "password")
66+
grawlCmd.Flags().StringVarP(&grawlFlags.FlagPassword, flagNamePassword, "p", "", "Use this for HTTP Basic Authentication.")
67+
bindViperFlag(flagNamePassword)
4768

48-
grawlCmd.Flags().StringVar(&grawlFlags.FlagUserAgent, "user-agent", "grawler", "Sets the user agent.")
49-
configs.BindViperFlag(grawlCmd, keyPrefix, "user-agent")
69+
grawlCmd.Flags().StringVar(&grawlFlags.FlagUserAgent, flagNameUserAgent, "grawler", "Sets the user agent.")
70+
bindViperFlag(flagNameUserAgent)
5071

51-
grawlCmd.Flags().BoolVarP(&grawlFlags.FlagSitemap, "sitemap", "s", false, "Checks the sitemap. If this is flag is set the url parameter has to be the url to the sitemap.xml.")
52-
configs.BindViperFlag(grawlCmd, keyPrefix, "sitemap")
72+
grawlCmd.Flags().BoolVarP(&grawlFlags.FlagSitemap, flagNameSitemap, "s", false, "Checks the sitemap. If this is flag is set the url parameter has to be the url to the sitemap.xml.")
73+
bindViperFlag(flagNameSitemap)
5374

54-
grawlCmd.Flags().StringSliceVarP(&grawlFlags.FlagAllowedDomains, "allowed-domains", "a", nil, "A comma separated list of allowed domains to be crawled. The domain of the given url is always allowed.")
55-
configs.BindViperFlag(grawlCmd, keyPrefix, "allowed-domains")
75+
grawlCmd.Flags().StringSliceVarP(&grawlFlags.FlagAllowedDomains, flagNameAllowedDomains, "a", nil, "A comma separated list of allowed domains to be crawled. The domain of the given url is always allowed.")
76+
bindViperFlag(flagNameAllowedDomains)
5677

57-
grawlCmd.Flags().BoolVar(&grawlFlags.FlagRespectRobotsTxt, "respect-robots-txt", false, "Respect the robots.txt file.")
58-
configs.BindViperFlag(grawlCmd, keyPrefix, "respect-robots-txt")
78+
grawlCmd.Flags().BoolVar(&grawlFlags.FlagRespectRobotsTxt, flagNameRespectRobotsTxt, false, "Respect the robots.txt file.")
79+
bindViperFlag(flagNameRespectRobotsTxt)
5980

60-
grawlCmd.Flags().StringVar(&grawlFlags.FlagPath, "path", "", "Restrict the crawlings on a certain url path.")
61-
configs.BindViperFlag(grawlCmd, keyPrefix, "path")
81+
grawlCmd.Flags().StringVar(&grawlFlags.FlagPath, flagNamePath, "", "Restrict the crawlings on a certain url path.")
82+
bindViperFlag(flagNamePath)
6283

63-
grawlCmd.Flags().BoolVarP(&grawlFlags.FlagCheckAll, "check-all", "", false, "In addtion to html and xml-urls, also check image, js and css-urls, among others.")
64-
configs.BindViperFlag(grawlCmd, keyPrefix, "check-all")
84+
grawlCmd.Flags().BoolVarP(&grawlFlags.FlagCheckAll, flagNameCheckAll, "", false, "In addtion to html and xml-urls, also check image, js and css-urls, among others.")
85+
bindViperFlag(flagNameCheckAll)
6586

66-
grawlCmd.Flags().Float32Var(&grawlFlags.FlagRequestTimeout, "request-timeout", 10, "Timeout in seconds to wait for a response.")
67-
configs.BindViperFlag(grawlCmd, keyPrefix, "request-timeout")
87+
grawlCmd.Flags().Float32Var(&grawlFlags.FlagRequestTimeout, flagNameRequestTimeout, 10, "Timeout in seconds to wait for a response.")
88+
bindViperFlag(flagNameRequestTimeout)
6889

69-
grawlCmd.Flags().StringSliceVar(&grawlFlags.FlagURLFilters, "url-filters", nil, "Only visit urls that match the regular expressions given here.")
70-
configs.BindViperFlag(grawlCmd, keyPrefix, "url-filters")
90+
grawlCmd.Flags().StringSliceVar(&grawlFlags.FlagURLFilters, flagNameUrlFilters, nil, "Only visit urls that match the regular expressions given here.")
91+
bindViperFlag(flagNameUrlFilters)
7192

72-
grawlCmd.Flags().StringSliceVar(&grawlFlags.FlagDisallowedURLFilters, "disallowed-url-filters", nil, "Do not visit urls that match the regular expressions given here.")
73-
configs.BindViperFlag(grawlCmd, keyPrefix, "disallowed-url-filters")
93+
grawlCmd.Flags().StringSliceVar(&grawlFlags.FlagDisallowedURLFilters, flagNameDisallowedURLFilters, nil, "Do not visit urls that match the regular expressions given here.")
94+
bindViperFlag(flagNameDisallowedURLFilters)
7495
}
7596

7697
func warmItUp(url string) {
98+
99+
// Get values from viper back to flag vars
100+
grawlFlags.FlagDelay = viper.GetInt64(viperGrawlPrefix + "." + flagNameDelay)
101+
grawlFlags.FlagRandomDelay = viper.GetInt64(viperGrawlPrefix + "." + flagNameRandomDelay)
102+
grawlFlags.FlagMaxDepth = viper.GetInt(viperGrawlPrefix + "." + flagNameMaxDepth)
103+
grawlFlags.FlagOutputFilename = viper.GetString(viperGrawlPrefix + "." + flagNameOutputFilepath)
104+
grawlFlags.FlagParallel = viper.GetInt(viperGrawlPrefix + "." + flagNameParallel)
105+
grawlFlags.FlagUsername = viper.GetString(viperGrawlPrefix + "." + flagNameUsername)
106+
grawlFlags.FlagPassword = viper.GetString(viperGrawlPrefix + "." + flagNamePassword)
107+
grawlFlags.FlagUserAgent = viper.GetString(viperGrawlPrefix + "." + flagNameUserAgent)
108+
grawlFlags.FlagSitemap = viper.GetBool(viperGrawlPrefix + "." + flagNameSitemap)
109+
grawlFlags.FlagAllowedDomains = viper.GetStringSlice(viperGrawlPrefix + "." + flagNameAllowedDomains)
110+
grawlFlags.FlagRespectRobotsTxt = viper.GetBool(viperGrawlPrefix + "." + flagNameRespectRobotsTxt)
111+
grawlFlags.FlagPath = viper.GetString(viperGrawlPrefix + "." + flagNamePath)
112+
grawlFlags.FlagCheckAll = viper.GetBool(viperGrawlPrefix + "." + flagNameCheckAll)
113+
grawlFlags.FlagRequestTimeout = cast.ToFloat32(viper.Get(viperGrawlPrefix + "." + flagNameRequestTimeout))
114+
grawlFlags.FlagURLFilters = viper.GetStringSlice(viperGrawlPrefix + "." + flagNameUrlFilters)
115+
grawlFlags.FlagDisallowedURLFilters = viper.GetStringSlice(viperGrawlPrefix + "." + flagNameDisallowedURLFilters)
116+
117+
if flagConfigInfo {
118+
fmt.Println("")
119+
fmt.Println("Grawl configuration values")
120+
fmt.Println("==========================")
121+
fmt.Println("Url:", url)
122+
fmt.Println("Delay:", grawlFlags.FlagDelay)
123+
fmt.Println("RandomDelay:", grawlFlags.FlagRandomDelay)
124+
fmt.Println("MaxDepth:", grawlFlags.FlagMaxDepth)
125+
fmt.Println("OutputFilepath:", grawlFlags.FlagOutputFilename)
126+
fmt.Println("Parallel:", grawlFlags.FlagParallel)
127+
fmt.Println("Username:", grawlFlags.FlagUsername)
128+
fmt.Println("Password:", grawlFlags.FlagPassword)
129+
fmt.Println("UserAgent:", grawlFlags.FlagUserAgent)
130+
fmt.Println("Sitemap:", grawlFlags.FlagSitemap)
131+
fmt.Println("AllowedDomains:", grawlFlags.FlagAllowedDomains)
132+
fmt.Println("RespectRobotsTxt:", grawlFlags.FlagRespectRobotsTxt)
133+
fmt.Println("Path:", grawlFlags.FlagPath)
134+
fmt.Println("CheckAll:", grawlFlags.FlagCheckAll)
135+
fmt.Println("RequestTimeout:", grawlFlags.FlagRequestTimeout)
136+
fmt.Println("URLFilters:", grawlFlags.FlagURLFilters)
137+
fmt.Println("DisallowedURLFilters:", grawlFlags.FlagDisallowedURLFilters)
138+
fmt.Println("")
139+
}
140+
77141
grawler := grawl.NewGrawler(grawlFlags)
78142
grawler.Grawl(url)
79143
}
144+
145+
func bindViperFlag(flagLookup string) {
146+
key := viperGrawlPrefix + "." + flagLookup
147+
err := viper.BindPFlag(key, grawlCmd.Flags().Lookup(flagLookup))
148+
if err != nil {
149+
log.Fatalln(fmt.Errorf("error binding config option to flag: %v", err))
150+
return
151+
}
152+
}

cmd/init.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ var (
2828
)
2929

3030
func init() {
31-
initCmd.Flags().BoolVar(&initFlagUseHome, "home", false, fmt.Sprintf("If enabled it writes the config file to the path \"%s\"", configs.DefaultConfFile()))
31+
initCmd.Flags().BoolVar(&initFlagUseHome, "home", false, fmt.Sprintf("If enabled the config file is written to \"%s\"", configs.DefaultConfFile()))
3232
}
3333

3434
func writeConfigFile(configFilePathParam string) {

cmd/root.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ var (
1616
versionInfo *version.Info = nil
1717
flagVersion bool
1818
flagConfigPath string
19+
flagConfigInfo bool
1920
rootCmd = &cobra.Command{
2021
Use: "grawler",
2122
Short: "A simple web crawling application.",
@@ -38,6 +39,7 @@ func init() {
3839
rootCmd.AddCommand(initCmd)
3940
rootCmd.Flags().BoolVarP(&flagVersion, "version", "v", false, "Show version")
4041
rootCmd.PersistentFlags().StringVar(&flagConfigPath, "config", "", "Manually set the path to your config file.")
42+
rootCmd.PersistentFlags().BoolVar(&flagConfigInfo, "config-info", false, "Outputs the current configuration values.")
4143
}
4244

4345
func initConfig() {
@@ -61,10 +63,12 @@ func initConfig() {
6163
if err := viper.ReadInConfig(); err != nil {
6264
var configFileNotFoundError viper.ConfigFileNotFoundError
6365
if errors.As(err, &configFileNotFoundError) {
64-
//fmt.Println("Config file not found.", flagConfigPath)
66+
fmt.Println("No config file found.")
6567
} else {
6668
log.Fatalln(fmt.Sprintf("Something unexpected happened reading configuration file: %s, err: %s", configFilePath, err))
6769
}
70+
} else {
71+
fmt.Printf("Using config file \"%s\".\n", viper.ConfigFileUsed())
6872
}
6973
}
7074

internal/configs/configs.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package configs
22

33
import (
4-
"github.com/spf13/cobra"
5-
"github.com/spf13/viper"
64
"golang.org/x/text/cases"
75
"golang.org/x/text/language"
86
"log"
@@ -36,11 +34,6 @@ func DefaultConfFileType() string {
3634
return "yaml"
3735
}
3836

39-
func BindViperFlag(cobraCommand *cobra.Command, keyPrefix string, flagLookup string) {
40-
//viper.BindPFlag(keyPrefix+"."+toSnakeCase(flagLookup), cobraCommand.Flags().Lookup(flagLookup))
41-
viper.BindPFlag(keyPrefix+"."+flagLookup, cobraCommand.Flags().Lookup(flagLookup))
42-
}
43-
4437
func toCamelCase(key string) string {
4538
words := strings.Split(key, "-")
4639
caser := cases.Title(language.AmericanEnglish)

0 commit comments

Comments
 (0)