Skip to content

Commit b072a7d

Browse files
Minor improvement for reduxer
1 parent 6089857 commit b072a7d

File tree

4 files changed

+37
-40
lines changed

4 files changed

+37
-40
lines changed

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ require (
3636
github.com/wabarc/playback v0.0.0-20210718054702-cab6c6004933
3737
github.com/wabarc/rivet v0.0.0-20220207154318-37fc56bcf4e1
3838
github.com/wabarc/screenshot v1.4.1-0.20211226132820-f5eed318376e
39-
github.com/wabarc/telegra.ph v0.0.0-20210822083402-82f95ce60a37
39+
github.com/wabarc/telegra.ph v0.0.0-20220216145835-479d23542bfc
4040
github.com/wabarc/warcraft v0.2.2-0.20211107142816-7beea5a75ab5
4141
go.etcd.io/bbolt v1.3.6
4242
golang.org/x/net v0.0.0-20220121210141-e204ce36a2ba

go.sum

+2-2
Original file line numberDiff line numberDiff line change
@@ -713,8 +713,8 @@ github.com/wabarc/rivet v0.0.0-20220207154318-37fc56bcf4e1/go.mod h1:aWsIBu+Jr99
713713
github.com/wabarc/screenshot v1.3.1/go.mod h1:ei8rqXW5mdztkqcsb81YUVwBZFROgyjAQJrEKEiMWfY=
714714
github.com/wabarc/screenshot v1.4.1-0.20211226132820-f5eed318376e h1:8QUF3oJ/u4doLXiG5gwIu+NUrfTnusGfjzTz4mVZ1p0=
715715
github.com/wabarc/screenshot v1.4.1-0.20211226132820-f5eed318376e/go.mod h1:HhtMtB0tOiUId8zteVvMQDfJJ1Wa/c3Mg5KKrWNYlrs=
716-
github.com/wabarc/telegra.ph v0.0.0-20210822083402-82f95ce60a37 h1:lqVzAnARDEJO+bTtQiCRu2lg55QxboohtP/RZN9Y9mU=
717-
github.com/wabarc/telegra.ph v0.0.0-20210822083402-82f95ce60a37/go.mod h1:532VM0F+WU2TSVvolJN3U5xihKLBt8ubLqyfWExM7As=
716+
github.com/wabarc/telegra.ph v0.0.0-20220216145835-479d23542bfc h1:r2howsA9nt3I1GiPKeG1FO1S2rLTJsICFSX6CcXkxos=
717+
github.com/wabarc/telegra.ph v0.0.0-20220216145835-479d23542bfc/go.mod h1:A8Q31u/3x1+v+oGPtQE9xp1+fN9WDePAI9OS7RCOhyY=
718718
github.com/wabarc/warcraft v0.2.2-0.20211107142816-7beea5a75ab5 h1:jY/jqIy/ddCMWWWuTIeAazE5F4QW8HAIvlI69XMJ1ew=
719719
github.com/wabarc/warcraft v0.2.2-0.20211107142816-7beea5a75ab5/go.mod h1:/BbCwReBjlqHRaw8Yh+7sfAicOesiMYNhiFpuL1x8Rc=
720720
github.com/whyrusleeping/tar-utils v0.0.0-20180509141711-8c6c8ba81d5c/go.mod h1:xxcJeBb7SIUl/Wzkz1eVKJE/CB34YNrqX2TQI6jY9zs=

reduxer/reduxer.go

+28-16
Original file line numberDiff line numberDiff line change
@@ -72,40 +72,53 @@ type Remote struct {
7272
type Bundles map[string]*Bundle
7373

7474
// Get returns a Bundle by given name.
75-
func (b Bundles) Get(name string) *Bundle {
76-
return b[name]
75+
func (bs Bundles) Get(name string) (bundle *Bundle) {
76+
if b := bs[name]; b != nil {
77+
bundle = b
78+
}
79+
return
80+
}
81+
82+
// Shot returns a screenshot.Screenshots from Bundle.
83+
func (b *Bundle) Shot() (s screenshot.Screenshots) {
84+
if b != nil {
85+
return screenshot.Screenshots{
86+
URL: b.URL,
87+
Title: b.Title,
88+
Image: b.Image,
89+
HTML: b.HTML,
90+
PDF: b.PDF,
91+
}
92+
}
93+
return
7794
}
7895

7996
// Do executes secreenshot, print PDF and export html of given URLs
8097
// Returns a set of bundle containing screenshot data and file path
8198
// nolint:gocyclo
8299
func Do(ctx context.Context, urls ...*url.URL) (Bundles, error) {
100+
bundles := make(Bundles, len(urls))
83101
if !config.Opts.EnabledReduxer() {
84-
return nil, errors.New("Specify directory to environment `WAYBACK_STORAGE_DIR` to enable reduxer")
102+
return bundles, errors.New("Specify directory to environment `WAYBACK_STORAGE_DIR` to enable reduxer")
85103
}
86104

87105
shots, err := capture(ctx, urls...)
88106
if err != nil {
89-
return nil, err
107+
return bundles, err
90108
}
91109

92110
dir, err := createDir(config.Opts.StorageDir())
93111
if err != nil {
94-
return nil, err
112+
return bundles, err
95113
}
96114

97115
var wg sync.WaitGroup
98116
var mu sync.Mutex
99117
var warc = &warcraft.Warcraft{BasePath: dir, UserAgent: config.Opts.WaybackUserAgent()}
100-
var craft = func(in string) string {
101-
u, err := url.Parse(in)
102-
if err != nil {
103-
logger.Debug("create warc for %s failed", u.String())
104-
return ""
105-
}
106-
path, err := warc.Download(ctx, u)
118+
var craft = func(in *url.URL) string {
119+
path, err := warc.Download(ctx, in)
107120
if err != nil {
108-
logger.Debug("create warc for %s failed: %v", u.String(), err)
121+
logger.Debug("create warc for %s failed: %v", in.String(), err)
109122
return ""
110123
}
111124
return path
@@ -116,7 +129,6 @@ func Do(ctx context.Context, urls ...*url.URL) (Bundles, error) {
116129
buf []byte
117130
}
118131

119-
bundles := make(Bundles)
120132
for _, shot := range shots {
121133
wg.Add(1)
122134
go func(shot screenshot.Screenshots) {
@@ -129,6 +141,7 @@ func Do(ctx context.Context, urls ...*url.URL) (Bundles, error) {
129141
{key: &assets.Raw, buf: shot.HTML},
130142
{key: &assets.HAR, buf: shot.HAR},
131143
}
144+
u, _ := url.Parse(shot.URL)
132145
for _, slug := range slugs {
133146
if slug.buf == nil {
134147
logger.Warn("file empty, skipped")
@@ -152,13 +165,12 @@ func Do(ctx context.Context, urls ...*url.URL) (Bundles, error) {
152165
}
153166
}
154167
// Set path of WARC file directly to avoid read file as buffer
155-
if err := helper.SetField(&assets.WARC, "Local", craft(shot.URL)); err != nil {
168+
if err := helper.SetField(&assets.WARC, "Local", craft(u)); err != nil {
156169
logger.Error("assign field WARC to path struct failed: %v", err)
157170
}
158171
if err := helper.SetField(&assets.Media, "Local", media(ctx, dir, shot.URL)); err != nil {
159172
logger.Error("assign field Media to path struct failed: %v", err)
160173
}
161-
u, _ := url.Parse(shot.URL)
162174
article, err := readability.FromReader(bytes.NewReader(shot.HTML), u)
163175
if err != nil {
164176
logger.Error("parse html failed: %v", err)

wayback.go

+6-21
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313
"github.com/wabarc/logger"
1414
"github.com/wabarc/playback"
1515
"github.com/wabarc/rivet/ipfs"
16-
"github.com/wabarc/screenshot"
1716
"github.com/wabarc/wayback/config"
1817
"github.com/wabarc/wayback/errors"
1918
"github.com/wabarc/wayback/reduxer"
@@ -119,16 +118,14 @@ func (i IP) Wayback() string {
119118
secret := config.Opts.IPFSSecret()
120119
opts = append(opts, ipfs.Uses(target), ipfs.Apikey(apikey), ipfs.Secret(secret))
121120
}
122-
123121
arc := &ip.Shaft{Hold: ipfs.Options(opts...)}
122+
ctx := i.ctx
124123

125124
// If there is bundled HTML, it is utilized as the basis for IPFS
126125
// archiving and is sent to obelisk to crawl the rest of the page.
127-
if i.bundle != nil {
128-
i.ctx = arc.WithInput(i.ctx, i.bundle.HTML)
129-
}
126+
ctx = arc.WithInput(i.ctx, i.bundle.HTML)
130127

131-
dst, err := arc.Wayback(i.ctx, i.URL)
128+
dst, err := arc.Wayback(ctx, i.URL)
132129
if err != nil {
133130
logger.Error("wayback %s to IPFS failed: %v", i.URL.String(), err)
134131
return fmt.Sprint(err)
@@ -140,32 +137,20 @@ func (i IP) Wayback() string {
140137
// it reads URL from the PH and returns archived URL as a string.
141138
func (i PH) Wayback() string {
142139
arc := &ph.Archiver{}
143-
arc.SetShot(i.parseShot())
140+
ctx := arc.WithShot(i.ctx, i.bundle.Shot())
144141
if config.Opts.EnabledChromeRemote() {
145142
arc.ByRemote(config.Opts.ChromeRemoteAddr())
146143
}
144+
ctx = arc.WithArticle(ctx, i.bundle.Article)
147145

148-
dst, err := arc.Wayback(i.ctx, i.URL)
146+
dst, err := arc.Wayback(ctx, i.URL)
149147
if err != nil {
150148
logger.Error("wayback %s to telegra.ph failed: %v", i.URL.String(), err)
151149
return fmt.Sprint(err)
152150
}
153151
return dst
154152
}
155153

156-
func (i PH) parseShot() (shot screenshot.Screenshots) {
157-
if i.bundle != nil {
158-
shot = screenshot.Screenshots{
159-
URL: i.bundle.URL,
160-
Title: i.bundle.Title,
161-
Image: i.bundle.Image,
162-
HTML: i.bundle.HTML,
163-
PDF: i.bundle.PDF,
164-
}
165-
}
166-
return
167-
}
168-
169154
func wayback(w Waybacker) string {
170155
return w.Wayback()
171156
}

0 commit comments

Comments
 (0)