@@ -197,7 +197,8 @@ std::set<uint32_t> Merger::AllFeatures() const {
197197}
198198
199199// Inner process. May crash if the target crashes.
200- void Fuzzer::CrashResistantMergeInternalStep (const std::string &CFPath) {
200+ void Fuzzer::CrashResistantMergeInternalStep (const std::string &CFPath,
201+ bool IsSetCoverMerge) {
201202 Printf (" MERGE-INNER: using the control file '%s'\n " , CFPath.c_str ());
202203 Merger M;
203204 std::ifstream IF (CFPath);
@@ -235,13 +236,14 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
235236 // Collect coverage. We are iterating over the files in this order:
236237 // * First, files in the initial corpus ordered by size, smallest first.
237238 // * Then, all other files, smallest first.
238- // So it makes no sense to record all features for all files, instead we
239- // only record features that were not seen before.
240- std::set<size_t > UniqFeatures;
241- TPC.CollectFeatures ([&](size_t Feature) {
242- if (AllFeatures.insert (Feature).second )
243- UniqFeatures.insert (Feature);
244- });
239+ std::set<size_t > Features;
240+ if (IsSetCoverMerge)
241+ TPC.CollectFeatures ([&](size_t Feature) { Features.insert (Feature); });
242+ else
243+ TPC.CollectFeatures ([&](size_t Feature) {
244+ if (AllFeatures.insert (Feature).second )
245+ Features.insert (Feature);
246+ });
245247 TPC.UpdateObservedPCs ();
246248 // Show stats.
247249 if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1 )))
@@ -250,7 +252,7 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
250252 PrintStatsWrapper (" LOADED" );
251253 // Write the post-run marker and the coverage.
252254 OF << " FT " << i;
253- for (size_t F : UniqFeatures )
255+ for (size_t F : Features )
254256 OF << " " << F;
255257 OF << " \n " ;
256258 OF << " COV " << i;
@@ -264,6 +266,127 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
264266 PrintStatsWrapper (" DONE " );
265267}
266268
269+ // Merges all corpora into the first corpus. A file is added into
270+ // the first corpus only if it adds new features. Unlike `Merger::Merge`,
271+ // this implementation calculates an approximation of the minimum set
272+ // of corpora files, that cover all known features (set cover problem).
273+ // Generally, this means that files with more features are preferred for
274+ // merge into the first corpus. When two files have the same number of
275+ // features, the smaller one is preferred.
276+ size_t Merger::SetCoverMerge (const std::set<uint32_t > &InitialFeatures,
277+ std::set<uint32_t > *NewFeatures,
278+ const std::set<uint32_t > &InitialCov,
279+ std::set<uint32_t > *NewCov,
280+ std::vector<std::string> *NewFiles) {
281+ assert (NumFilesInFirstCorpus <= Files.size ());
282+ NewFiles->clear ();
283+ NewFeatures->clear ();
284+ NewCov->clear ();
285+ std::set<uint32_t > AllFeatures;
286+ // 1 << 21 - 1 is the maximum feature index.
287+ // See 'kFeatureSetSize' in 'FuzzerCorpus.h'.
288+ const uint32_t kFeatureSetSize = 1 << 21 ;
289+ std::vector<bool > Covered (kFeatureSetSize , false );
290+ size_t NumCovered = 0 ;
291+
292+ std::set<uint32_t > ExistingFeatures = InitialFeatures;
293+ for (size_t i = 0 ; i < NumFilesInFirstCorpus; ++i)
294+ ExistingFeatures.insert (Files[i].Features .begin (), Files[i].Features .end ());
295+
296+ // Mark the existing features as covered.
297+ for (const auto &F : ExistingFeatures) {
298+ if (!Covered[F % kFeatureSetSize ]) {
299+ ++NumCovered;
300+ Covered[F % kFeatureSetSize ] = true ;
301+ }
302+ // Calculate an underestimation of the set of covered features
303+ // since the `Covered` bitvector is smaller than the feature range.
304+ AllFeatures.insert (F % kFeatureSetSize );
305+ }
306+
307+ std::set<size_t > RemainingFiles;
308+ for (size_t i = NumFilesInFirstCorpus; i < Files.size (); ++i) {
309+ // Construct an incremental sequence which represent the
310+ // indices to all files (excluding those in the initial corpus).
311+ // RemainingFiles = range(NumFilesInFirstCorpus..Files.size()).
312+ RemainingFiles.insert (i);
313+ // Insert this file's unique features to all features.
314+ for (const auto &F : Files[i].Features )
315+ AllFeatures.insert (F % kFeatureSetSize );
316+ }
317+
318+ // Integrate files into Covered until set is complete.
319+ while (NumCovered != AllFeatures.size ()) {
320+ // Index to file with largest number of unique features.
321+ size_t MaxFeaturesIndex = NumFilesInFirstCorpus;
322+ // Indices to remove from RemainingFiles.
323+ std::set<size_t > RemoveIndices;
324+ // Running max unique feature count.
325+ // Updated upon finding a file with more features.
326+ size_t MaxNumFeatures = 0 ;
327+
328+ // Iterate over all files not yet integrated into Covered,
329+ // to find the file which has the largest number of
330+ // features that are not already in Covered.
331+ for (const auto &i : RemainingFiles) {
332+ const auto &File = Files[i];
333+ size_t CurrentUnique = 0 ;
334+ // Count number of features in this file
335+ // which are not yet in Covered.
336+ for (const auto &F : File.Features )
337+ if (!Covered[F % kFeatureSetSize ])
338+ ++CurrentUnique;
339+
340+ if (CurrentUnique == 0 ) {
341+ // All features in this file are already in Covered: skip next time.
342+ RemoveIndices.insert (i);
343+ } else if (CurrentUnique > MaxNumFeatures ||
344+ (CurrentUnique == MaxNumFeatures &&
345+ File.Size < Files[MaxFeaturesIndex].Size )) {
346+ // Update the max features file based on unique features
347+ // Break ties by selecting smaller files.
348+ MaxNumFeatures = CurrentUnique;
349+ MaxFeaturesIndex = i;
350+ }
351+ }
352+ // Must be a valid index/
353+ assert (MaxFeaturesIndex < Files.size ());
354+ // Remove any feature-less files found.
355+ for (const auto &i : RemoveIndices)
356+ RemainingFiles.erase (i);
357+ if (MaxNumFeatures == 0 ) {
358+ // Did not find a file that adds unique features.
359+ // This means that we should have no remaining files.
360+ assert (RemainingFiles.size () == 0 );
361+ assert (NumCovered == AllFeatures.size ());
362+ break ;
363+ }
364+
365+ // MaxFeaturesIndex must be an element of Remaining.
366+ assert (RemainingFiles.find (MaxFeaturesIndex) != RemainingFiles.end ());
367+ // Remove the file with the most features from Remaining.
368+ RemainingFiles.erase (MaxFeaturesIndex);
369+ const auto &MaxFeatureFile = Files[MaxFeaturesIndex];
370+ // Add the features of the max feature file to Covered.
371+ for (const auto &F : MaxFeatureFile.Features ) {
372+ if (!Covered[F % kFeatureSetSize ]) {
373+ ++NumCovered;
374+ Covered[F % kFeatureSetSize ] = true ;
375+ NewFeatures->insert (F);
376+ }
377+ }
378+ // Add the index to this file to the result.
379+ NewFiles->push_back (MaxFeatureFile.Name );
380+ // Update NewCov with the additional coverage
381+ // that MaxFeatureFile provides.
382+ for (const auto &C : MaxFeatureFile.Cov )
383+ if (InitialCov.find (C) == InitialCov.end ())
384+ NewCov->insert (C);
385+ }
386+
387+ return NewFeatures->size ();
388+ }
389+
267390static size_t
268391WriteNewControlFile (const std::string &CFPath,
269392 const std::vector<SizedFile> &OldCorpus,
@@ -309,7 +432,8 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
309432 std::set<uint32_t > *NewFeatures,
310433 const std::set<uint32_t > &InitialCov,
311434 std::set<uint32_t > *NewCov, const std::string &CFPath,
312- bool V /* Verbose*/ ) {
435+ bool V, /* Verbose*/
436+ bool IsSetCoverMerge) {
313437 if (NewCorpus.empty () && OldCorpus.empty ()) return ; // Nothing to merge.
314438 size_t NumAttempts = 0 ;
315439 std::vector<MergeFileInfo> KnownFiles;
@@ -364,14 +488,17 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
364488 // Every inner process should execute at least one input.
365489 Command BaseCmd (Args);
366490 BaseCmd.removeFlag (" merge" );
491+ BaseCmd.removeFlag (" set_cover_merge" );
367492 BaseCmd.removeFlag (" fork" );
368493 BaseCmd.removeFlag (" collect_data_flow" );
369494 for (size_t Attempt = 1 ; Attempt <= NumAttempts; Attempt++) {
370495 Fuzzer::MaybeExitGracefully ();
371496 VPrintf (V, " MERGE-OUTER: attempt %zd\n " , Attempt);
372497 Command Cmd (BaseCmd);
373498 Cmd.addFlag (" merge_control_file" , CFPath);
374- Cmd.addFlag (" merge_inner" , " 1" );
499+ // If we are going to use the set cover implementation for
500+ // minimization add the merge_inner=2 internal flag.
501+ Cmd.addFlag (" merge_inner" , IsSetCoverMerge ? " 2" : " 1" );
375502 if (!V) {
376503 Cmd.setOutputFile (getDevNull ());
377504 Cmd.combineOutAndErr ();
@@ -396,7 +523,10 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
396523 M.ApproximateMemoryConsumption () >> 20 , GetPeakRSSMb ());
397524
398525 M.Files .insert (M.Files .end (), KnownFiles.begin (), KnownFiles.end ());
399- M.Merge (InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
526+ if (IsSetCoverMerge)
527+ M.SetCoverMerge (InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
528+ else
529+ M.Merge (InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
400530 VPrintf (V, " MERGE-OUTER: %zd new files with %zd new features added; "
401531 " %zd new coverage edges\n " ,
402532 NewFiles->size (), NewFeatures->size (), NewCov->size ());
0 commit comments