Skip to content

Commit 4d2c345

Browse files
committed
fix: enhance filename extraction logic for downloads and add unit tests
1 parent 33a886f commit 4d2c345

File tree

3 files changed

+118
-2
lines changed

3 files changed

+118
-2
lines changed

core/tasks/directlinks/execute.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,17 @@ func (t *Task) Execute(ctx context.Context) error {
4545
fetchedTotalBytes.Add(resp.ContentLength)
4646
file.Size = resp.ContentLength
4747
if name := resp.Header.Get("Content-Disposition"); name != "" {
48-
// Set file name
4948
filename := parseFilename(name)
50-
file.Name = filename
49+
if filename != "" {
50+
file.Name = filename
51+
}
52+
}
53+
// extract filename from URL if Content-Disposition is empty or invalid
54+
if file.Name == "" {
55+
file.Name = parseFilenameFromURL(file.URL)
56+
}
57+
if file.Name == "" {
58+
return fmt.Errorf("failed to determine filename for %s: Content-Disposition header is empty and URL does not contain a valid filename", file.URL)
5159
}
5260

5361
return nil

core/tasks/directlinks/util.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,41 @@ func tryDecodeGBK(s string) string {
144144
return ""
145145
}
146146

147+
// parseFilenameFromURL extracts filename from URL path
148+
// This is used as a fallback when Content-Disposition is not available
149+
func parseFilenameFromURL(rawURL string) string {
150+
parsed, err := url.Parse(rawURL)
151+
if err != nil {
152+
return ""
153+
}
154+
155+
// Get the path part and extract the last segment
156+
path := parsed.Path
157+
if path == "" {
158+
return ""
159+
}
160+
161+
// URL decode the path first
162+
decodedPath, err := url.PathUnescape(path)
163+
if err != nil {
164+
decodedPath = path
165+
}
166+
167+
// Get the last segment of the path
168+
lastSlash := strings.LastIndex(decodedPath, "/")
169+
if lastSlash == -1 {
170+
return decodedPath
171+
}
172+
filename := decodedPath[lastSlash+1:]
173+
174+
// Remove query string if somehow still present
175+
if idx := strings.Index(filename, "?"); idx != -1 {
176+
filename = filename[:idx]
177+
}
178+
179+
return filename
180+
}
181+
147182
// parseFilenameFallback manually parses filename= when mime.ParseMediaType fails
148183
func parseFilenameFallback(cd string) string {
149184
// Look for filename= (case-insensitive)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package directlinks
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestParseFilenameFromURL(t *testing.T) {
8+
tests := []struct {
9+
name string
10+
url string
11+
expected string
12+
}{
13+
{
14+
name: "simple filename",
15+
url: "https://example.com/files/document.pdf",
16+
expected: "document.pdf",
17+
},
18+
{
19+
name: "filename with encoded characters",
20+
url: "https://example.com/files/%E6%B5%8B%E8%AF%95.zip",
21+
expected: "测试.zip",
22+
},
23+
{
24+
name: "filename with query string in URL",
25+
url: "https://example.com/files/image.png?token=abc123",
26+
expected: "image.png",
27+
},
28+
{
29+
name: "nested path",
30+
url: "https://example.com/a/b/c/file.txt",
31+
expected: "file.txt",
32+
},
33+
{
34+
name: "URL with port",
35+
url: "https://example.com:8080/downloads/archive.tar.gz",
36+
expected: "archive.tar.gz",
37+
},
38+
{
39+
name: "empty path",
40+
url: "https://example.com",
41+
expected: "",
42+
},
43+
{
44+
name: "root path only",
45+
url: "https://example.com/",
46+
expected: "",
47+
},
48+
{
49+
name: "filename with spaces encoded",
50+
url: "https://example.com/my%20file%20name.pdf",
51+
expected: "my file name.pdf",
52+
},
53+
{
54+
name: "complex encoded filename",
55+
url: "https://example.com/downloads/%E4%B8%AD%E6%96%87%E6%96%87%E4%BB%B6.docx",
56+
expected: "中文文件.docx",
57+
},
58+
{
59+
name: "invalid URL",
60+
url: "://invalid-url",
61+
expected: "",
62+
},
63+
}
64+
65+
for _, tt := range tests {
66+
t.Run(tt.name, func(t *testing.T) {
67+
result := parseFilenameFromURL(tt.url)
68+
if result != tt.expected {
69+
t.Errorf("parseFilenameFromURL(%q) = %q, want %q", tt.url, result, tt.expected)
70+
}
71+
})
72+
}
73+
}

0 commit comments

Comments
 (0)