|
| 1 | +package main |
| 2 | + |
| 3 | +import ( |
| 4 | + "encoding/hex" |
| 5 | + "path/filepath" |
| 6 | + "reflect" |
| 7 | + "sort" |
| 8 | + "testing" |
| 9 | + "testing/fstest" |
| 10 | + |
| 11 | + "github.com/richardlehane/siegfried" |
| 12 | + "github.com/richardlehane/siegfried/pkg/config" |
| 13 | + "github.com/richardlehane/siegfried/pkg/pronom" |
| 14 | +) |
| 15 | + |
| 16 | +var DataPath string = filepath.Join("..", "..", "cmd", "roy", "data") |
| 17 | + |
| 18 | +// pronomIdentificationTests provides our structure for table driven tests. |
| 19 | +type pronomIdentificationTests struct { |
| 20 | + identiifer string |
| 21 | + puid string |
| 22 | + label string |
| 23 | + version string |
| 24 | + mime string |
| 25 | + types string |
| 26 | + details string |
| 27 | + error string |
| 28 | +} |
| 29 | + |
| 30 | +var skeletons = make(map[string]*fstest.MapFile) |
| 31 | + |
| 32 | +var minimalPronom = []string{"fmt/1", "fmt/3", "fmt/5", "fmt/11", "fmt/14"} |
| 33 | + |
| 34 | +// Populate the global skeletons map from string-based byte-sequences to |
| 35 | +// save having to store skeletons on disk and read from them. |
| 36 | +func makeSkeletons() { |
| 37 | + var files = make(map[string]string) |
| 38 | + files["fmt-11-signature-id-58.png"] = "89504e470d0a1a0a0000000d494844520000000049454e44ae426082" |
| 39 | + files["fmt-14-signature-id-123.pdf"] = "255044462d312e302525454f46" |
| 40 | + files["fmt-1-signature-id-1032.wav"] = ("" + |
| 41 | + "524946460000000057415645000000000000000000000000000000000000" + |
| 42 | + "000062657874000000000000000000000000000000000000000000000000" + |
| 43 | + "000000000000000000000000000000000000000000000000000000000000" + |
| 44 | + "000000000000000000000000000000000000000000000000000000000000" + |
| 45 | + "000000000000000000000000000000000000000000000000000000000000" + |
| 46 | + "000000000000000000000000000000000000000000000000000000000000" + |
| 47 | + "000000000000000000000000000000000000000000000000000000000000" + |
| 48 | + "000000000000000000000000000000000000000000000000000000000000" + |
| 49 | + "000000000000000000000000000000000000000000000000000000000000" + |
| 50 | + "000000000000000000000000000000000000000000000000000000000000" + |
| 51 | + "000000000000000000000000000000000000000000000000000000000000" + |
| 52 | + "000000000000000000000000000000000000000000000000000000000000" + |
| 53 | + "00000000000000000000000000000000000000000000000000000000" + |
| 54 | + "") |
| 55 | + files["fmt-5-signature-id-51.avi"] = ("" + |
| 56 | + "524946460000000041564920000000000000000000000000000000000000" + |
| 57 | + "00004c495354000000006864726c61766968000000000000000000000000" + |
| 58 | + "00000000000000004c495354000000006d6f7669" + |
| 59 | + "") |
| 60 | + files["fmt-3-signature-id-18.gif"] = "4749463837613b" |
| 61 | + files["badf00d.unknown"] = "badf00d" |
| 62 | + for key, val := range files { |
| 63 | + data, _ := hex.DecodeString(val) |
| 64 | + skeletons[key] = &fstest.MapFile{Data: []byte(data)} |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +var pronomIDs = []pronomIdentificationTests{ |
| 69 | + { |
| 70 | + "pronom", |
| 71 | + "UNKNOWN", |
| 72 | + "", |
| 73 | + "", |
| 74 | + "", |
| 75 | + "", |
| 76 | + "", |
| 77 | + "no match", |
| 78 | + }, |
| 79 | + { |
| 80 | + "pronom", |
| 81 | + "fmt/1", |
| 82 | + "Broadcast WAVE", |
| 83 | + "0 Generic", |
| 84 | + "audio/x-wav", |
| 85 | + "Audio", |
| 86 | + "extension match wav; byte match at [[0 12] [32 356]]", |
| 87 | + "", |
| 88 | + }, |
| 89 | + { |
| 90 | + "pronom", |
| 91 | + "fmt/11", |
| 92 | + "Portable Network Graphics", |
| 93 | + "1.0", |
| 94 | + "image/png", |
| 95 | + "Image (Raster)", |
| 96 | + "extension match png; byte match at [[0 16] [16 12]]", |
| 97 | + "", |
| 98 | + }, |
| 99 | + { |
| 100 | + "pronom", |
| 101 | + "fmt/14", |
| 102 | + "Acrobat PDF 1.0 - Portable Document Format", |
| 103 | + "1.0", |
| 104 | + "application/pdf", |
| 105 | + "Page Description", |
| 106 | + "extension match pdf; byte match at [[0 8] [8 5]]", |
| 107 | + "", |
| 108 | + }, |
| 109 | + { |
| 110 | + "pronom", |
| 111 | + "fmt/3", |
| 112 | + "Graphics Interchange Format", |
| 113 | + "87a", |
| 114 | + "image/gif", |
| 115 | + "Image (Raster)", |
| 116 | + "extension match gif; byte match at [[0 6] [6 1]]", |
| 117 | + "", |
| 118 | + }, |
| 119 | + { |
| 120 | + "pronom", |
| 121 | + "fmt/5", |
| 122 | + "Audio/Video Interleaved Format", |
| 123 | + "", |
| 124 | + "video/x-msvideo", |
| 125 | + "Audio, Video", |
| 126 | + "extension match avi; byte match at [[0 12] [32 16] [68 12]]", |
| 127 | + "", |
| 128 | + }, |
| 129 | +} |
| 130 | + |
| 131 | +// TestPronom looks to see if PRONOM identification results for a |
| 132 | +// minimized PRONOM dataset are correct and contain the information we |
| 133 | +// anticipate. |
| 134 | +func TestPronom(t *testing.T) { |
| 135 | + sf := siegfried.New() |
| 136 | + config.SetHome(DataPath) |
| 137 | + identifier, err := pronom.New(config.SetLimit(minimalPronom)) |
| 138 | + if err != nil { |
| 139 | + t.Errorf("Error creating new PRONOM identifier: %s", err) |
| 140 | + } |
| 141 | + sf.Add(identifier) |
| 142 | + makeSkeletons() |
| 143 | + skeletonFS := fstest.MapFS(skeletons) |
| 144 | + testDirListing, err := skeletonFS.ReadDir(".") |
| 145 | + if err != nil { |
| 146 | + t.Fatalf("Error reading test files directory: %s", err) |
| 147 | + } |
| 148 | + const resultLen int = 8 |
| 149 | + results := make([]pronomIdentificationTests, 0) |
| 150 | + for _, val := range testDirListing { |
| 151 | + testFilePath := filepath.Join(".", val.Name()) |
| 152 | + reader, _ := skeletonFS.Open(val.Name()) |
| 153 | + res, _ := sf.Identify(reader, testFilePath, "") |
| 154 | + result := res[0].Values() |
| 155 | + if len(result) != resultLen { |
| 156 | + t.Errorf("Result len: %d not %d", len(result), resultLen) |
| 157 | + } |
| 158 | + idResult := pronomIdentificationTests{ |
| 159 | + result[0], // identifier |
| 160 | + result[1], // PUID |
| 161 | + result[2], // label |
| 162 | + result[3], // version |
| 163 | + result[4], // mime |
| 164 | + result[5], // types |
| 165 | + result[6], // details |
| 166 | + result[7], // error |
| 167 | + } |
| 168 | + results = append(results, idResult) |
| 169 | + } |
| 170 | + // Sort expected results and received results to make them |
| 171 | + // comparable. |
| 172 | + sort.Slice(pronomIDs, func(i, j int) bool { |
| 173 | + return pronomIDs[i].puid < pronomIDs[j].puid |
| 174 | + }) |
| 175 | + sort.Slice(results, func(i, j int) bool { |
| 176 | + return results[i].puid < results[j].puid |
| 177 | + }) |
| 178 | + // Compare results on a result by result basis. |
| 179 | + for idx, res := range results { |
| 180 | + //t.Error(res) |
| 181 | + if !reflect.DeepEqual(res, pronomIDs[idx]) { |
| 182 | + t.Errorf("Results not equal for %s; expected %v; got %v", res.puid, pronomIDs[idx], res) |
| 183 | + } |
| 184 | + } |
| 185 | + config.Clear()() |
| 186 | +} |
0 commit comments