Skip to content

Commit e6597db

Browse files
ariskoutsou21morehouse
authored andcommitted
Greedy set cover implementation of Merger::Merge
Extend the existing single-pass algorithm for `Merger::Merge` with an algorithm that gives better results. This new implementation can be used with a new **set_cover_merge=1** flag. This greedy set cover implementation gives a substantially smaller final corpus (40%-80% less testcases) while preserving the same features/coverage. At the same time, the execution time penalty is not that significant (+50% for ~1M corpus files and far less for smaller corpora). These results were obtained by comparing several targets with varying size corpora. Change `Merger::CrashResistantMergeInternalStep` to collect all features from each file and not just unique ones. This is needed for the set cover algorithm to work correctly. The implementation of the algorithm in `Merger::SetCoverMerge` uses a bitvector to store features that are covered by a file while performing the pass. Collisions while indexing the bitvector are ignored similarly to the fuzzer. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D105284
1 parent 0e627c9 commit e6597db

File tree

8 files changed

+370
-20
lines changed

8 files changed

+370
-20
lines changed

compiler-rt/lib/fuzzer/FuzzerDriver.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ void Merge(Fuzzer *F, FuzzingOptions &Options,
523523
std::vector<std::string> NewFiles;
524524
std::set<uint32_t> NewFeatures, NewCov;
525525
CrashResistantMerge(Args, OldCorpus, NewCorpus, &NewFiles, {}, &NewFeatures,
526-
{}, &NewCov, CFPath, true);
526+
{}, &NewCov, CFPath, true, Flags.set_cover_merge);
527527
for (auto &Path : NewFiles)
528528
F->WriteToOutputCorpus(FileToVector(Path, Options.MaxLen));
529529
// We are done, delete the control file if it was a temporary one.
@@ -797,7 +797,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
797797
if (Flags.verbosity)
798798
Printf("INFO: Seed: %u\n", Seed);
799799

800-
if (Flags.collect_data_flow && !Flags.fork && !Flags.merge) {
800+
if (Flags.collect_data_flow && !Flags.fork &&
801+
!(Flags.merge || Flags.set_cover_merge)) {
801802
if (RunIndividualFiles)
802803
return CollectDataFlow(Flags.collect_data_flow, Flags.data_flow_trace,
803804
ReadCorpora({}, *Inputs));
@@ -872,15 +873,16 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
872873
if (Flags.fork)
873874
FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork);
874875

875-
if (Flags.merge)
876+
if (Flags.merge || Flags.set_cover_merge)
876877
Merge(F, Options, Args, *Inputs, Flags.merge_control_file);
877878

878879
if (Flags.merge_inner) {
879880
const size_t kDefaultMaxMergeLen = 1 << 20;
880881
if (Options.MaxLen == 0)
881882
F->SetMaxInputLen(kDefaultMaxMergeLen);
882883
assert(Flags.merge_control_file);
883-
F->CrashResistantMergeInternalStep(Flags.merge_control_file);
884+
F->CrashResistantMergeInternalStep(Flags.merge_control_file,
885+
!strncmp(Flags.merge_inner, "2", 1));
884886
exit(0);
885887
}
886888

compiler-rt/lib/fuzzer/FuzzerFlags.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ FUZZER_FLAG_INT(ignore_crashes, 0, "Ignore crashes in fork mode")
6464
FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be "
6565
"merged into the 1-st corpus. Only interesting units will be taken. "
6666
"This flag can be used to minimize a corpus.")
67+
FUZZER_FLAG_INT(set_cover_merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be "
68+
"merged into the 1-st corpus. Same as the 'merge' flag, but uses the "
69+
"standard greedy algorithm for the set cover problem to "
70+
"compute an approximation of the minimum set of testcases that "
71+
"provide the same coverage as the initial corpora")
6772
FUZZER_FLAG_STRING(stop_file, "Stop fuzzing ASAP if this file exists")
6873
FUZZER_FLAG_STRING(merge_inner, "internal flag")
6974
FUZZER_FLAG_STRING(merge_control_file,

compiler-rt/lib/fuzzer/FuzzerFork.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,11 @@ struct GlobalEnv {
213213

214214
std::vector<std::string> FilesToAdd;
215215
std::set<uint32_t> NewFeatures, NewCov;
216+
bool IsSetCoverMerge =
217+
!Job->Cmd.getFlagValue("set_cover_merge").compare("1");
216218
CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
217-
&NewFeatures, Cov, &NewCov, Job->CFPath, false);
219+
&NewFeatures, Cov, &NewCov, Job->CFPath, false,
220+
IsSetCoverMerge);
218221
for (auto &Path : FilesToAdd) {
219222
auto U = FileToVector(Path);
220223
auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
@@ -318,7 +321,8 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
318321
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
319322
std::set<uint32_t> NewFeatures, NewCov;
320323
CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features,
321-
&NewFeatures, Env.Cov, &NewCov, CFPath, false);
324+
&NewFeatures, Env.Cov, &NewCov, CFPath,
325+
/*Verbose=*/false, /*IsSetCoverMerge=*/false);
322326
Env.Features.insert(NewFeatures.begin(), NewFeatures.end());
323327
Env.Cov.insert(NewFeatures.begin(), NewFeatures.end());
324328
RemoveFile(CFPath);

compiler-rt/lib/fuzzer/FuzzerInternal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ class Fuzzer {
7373

7474
// Merge Corpora[1:] into Corpora[0].
7575
void Merge(const std::vector<std::string> &Corpora);
76-
void CrashResistantMergeInternalStep(const std::string &ControlFilePath);
76+
void CrashResistantMergeInternalStep(const std::string &ControlFilePath,
77+
bool IsSetCoverMerge);
7778
MutationDispatcher &GetMD() { return MD; }
7879
void PrintFinalStats();
7980
void SetMaxInputLen(size_t MaxInputLen);

compiler-rt/lib/fuzzer/FuzzerMerge.cpp

Lines changed: 142 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,8 @@ std::set<uint32_t> Merger::AllFeatures() const {
197197
}
198198

199199
// Inner process. May crash if the target crashes.
200-
void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
200+
void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath,
201+
bool IsSetCoverMerge) {
201202
Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str());
202203
Merger M;
203204
std::ifstream IF(CFPath);
@@ -235,13 +236,14 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
235236
// Collect coverage. We are iterating over the files in this order:
236237
// * First, files in the initial corpus ordered by size, smallest first.
237238
// * Then, all other files, smallest first.
238-
// So it makes no sense to record all features for all files, instead we
239-
// only record features that were not seen before.
240-
std::set<size_t> UniqFeatures;
241-
TPC.CollectFeatures([&](size_t Feature) {
242-
if (AllFeatures.insert(Feature).second)
243-
UniqFeatures.insert(Feature);
244-
});
239+
std::set<size_t> Features;
240+
if (IsSetCoverMerge)
241+
TPC.CollectFeatures([&](size_t Feature) { Features.insert(Feature); });
242+
else
243+
TPC.CollectFeatures([&](size_t Feature) {
244+
if (AllFeatures.insert(Feature).second)
245+
Features.insert(Feature);
246+
});
245247
TPC.UpdateObservedPCs();
246248
// Show stats.
247249
if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)))
@@ -250,7 +252,7 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
250252
PrintStatsWrapper("LOADED");
251253
// Write the post-run marker and the coverage.
252254
OF << "FT " << i;
253-
for (size_t F : UniqFeatures)
255+
for (size_t F : Features)
254256
OF << " " << F;
255257
OF << "\n";
256258
OF << "COV " << i;
@@ -264,6 +266,127 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
264266
PrintStatsWrapper("DONE ");
265267
}
266268

269+
// Merges all corpora into the first corpus. A file is added into
270+
// the first corpus only if it adds new features. Unlike `Merger::Merge`,
271+
// this implementation calculates an approximation of the minimum set
272+
// of corpora files, that cover all known features (set cover problem).
273+
// Generally, this means that files with more features are preferred for
274+
// merge into the first corpus. When two files have the same number of
275+
// features, the smaller one is preferred.
276+
size_t Merger::SetCoverMerge(const std::set<uint32_t> &InitialFeatures,
277+
std::set<uint32_t> *NewFeatures,
278+
const std::set<uint32_t> &InitialCov,
279+
std::set<uint32_t> *NewCov,
280+
std::vector<std::string> *NewFiles) {
281+
assert(NumFilesInFirstCorpus <= Files.size());
282+
NewFiles->clear();
283+
NewFeatures->clear();
284+
NewCov->clear();
285+
std::set<uint32_t> AllFeatures;
286+
// 1 << 21 - 1 is the maximum feature index.
287+
// See 'kFeatureSetSize' in 'FuzzerCorpus.h'.
288+
const uint32_t kFeatureSetSize = 1 << 21;
289+
std::vector<bool> Covered(kFeatureSetSize, false);
290+
size_t NumCovered = 0;
291+
292+
std::set<uint32_t> ExistingFeatures = InitialFeatures;
293+
for (size_t i = 0; i < NumFilesInFirstCorpus; ++i)
294+
ExistingFeatures.insert(Files[i].Features.begin(), Files[i].Features.end());
295+
296+
// Mark the existing features as covered.
297+
for (const auto &F : ExistingFeatures) {
298+
if (!Covered[F % kFeatureSetSize]) {
299+
++NumCovered;
300+
Covered[F % kFeatureSetSize] = true;
301+
}
302+
// Calculate an underestimation of the set of covered features
303+
// since the `Covered` bitvector is smaller than the feature range.
304+
AllFeatures.insert(F % kFeatureSetSize);
305+
}
306+
307+
std::set<size_t> RemainingFiles;
308+
for (size_t i = NumFilesInFirstCorpus; i < Files.size(); ++i) {
309+
// Construct an incremental sequence which represent the
310+
// indices to all files (excluding those in the initial corpus).
311+
// RemainingFiles = range(NumFilesInFirstCorpus..Files.size()).
312+
RemainingFiles.insert(i);
313+
// Insert this file's unique features to all features.
314+
for (const auto &F : Files[i].Features)
315+
AllFeatures.insert(F % kFeatureSetSize);
316+
}
317+
318+
// Integrate files into Covered until set is complete.
319+
while (NumCovered != AllFeatures.size()) {
320+
// Index to file with largest number of unique features.
321+
size_t MaxFeaturesIndex = NumFilesInFirstCorpus;
322+
// Indices to remove from RemainingFiles.
323+
std::set<size_t> RemoveIndices;
324+
// Running max unique feature count.
325+
// Updated upon finding a file with more features.
326+
size_t MaxNumFeatures = 0;
327+
328+
// Iterate over all files not yet integrated into Covered,
329+
// to find the file which has the largest number of
330+
// features that are not already in Covered.
331+
for (const auto &i : RemainingFiles) {
332+
const auto &File = Files[i];
333+
size_t CurrentUnique = 0;
334+
// Count number of features in this file
335+
// which are not yet in Covered.
336+
for (const auto &F : File.Features)
337+
if (!Covered[F % kFeatureSetSize])
338+
++CurrentUnique;
339+
340+
if (CurrentUnique == 0) {
341+
// All features in this file are already in Covered: skip next time.
342+
RemoveIndices.insert(i);
343+
} else if (CurrentUnique > MaxNumFeatures ||
344+
(CurrentUnique == MaxNumFeatures &&
345+
File.Size < Files[MaxFeaturesIndex].Size)) {
346+
// Update the max features file based on unique features
347+
// Break ties by selecting smaller files.
348+
MaxNumFeatures = CurrentUnique;
349+
MaxFeaturesIndex = i;
350+
}
351+
}
352+
// Must be a valid index/
353+
assert(MaxFeaturesIndex < Files.size());
354+
// Remove any feature-less files found.
355+
for (const auto &i : RemoveIndices)
356+
RemainingFiles.erase(i);
357+
if (MaxNumFeatures == 0) {
358+
// Did not find a file that adds unique features.
359+
// This means that we should have no remaining files.
360+
assert(RemainingFiles.size() == 0);
361+
assert(NumCovered == AllFeatures.size());
362+
break;
363+
}
364+
365+
// MaxFeaturesIndex must be an element of Remaining.
366+
assert(RemainingFiles.find(MaxFeaturesIndex) != RemainingFiles.end());
367+
// Remove the file with the most features from Remaining.
368+
RemainingFiles.erase(MaxFeaturesIndex);
369+
const auto &MaxFeatureFile = Files[MaxFeaturesIndex];
370+
// Add the features of the max feature file to Covered.
371+
for (const auto &F : MaxFeatureFile.Features) {
372+
if (!Covered[F % kFeatureSetSize]) {
373+
++NumCovered;
374+
Covered[F % kFeatureSetSize] = true;
375+
NewFeatures->insert(F);
376+
}
377+
}
378+
// Add the index to this file to the result.
379+
NewFiles->push_back(MaxFeatureFile.Name);
380+
// Update NewCov with the additional coverage
381+
// that MaxFeatureFile provides.
382+
for (const auto &C : MaxFeatureFile.Cov)
383+
if (InitialCov.find(C) == InitialCov.end())
384+
NewCov->insert(C);
385+
}
386+
387+
return NewFeatures->size();
388+
}
389+
267390
static size_t
268391
WriteNewControlFile(const std::string &CFPath,
269392
const std::vector<SizedFile> &OldCorpus,
@@ -309,7 +432,8 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
309432
std::set<uint32_t> *NewFeatures,
310433
const std::set<uint32_t> &InitialCov,
311434
std::set<uint32_t> *NewCov, const std::string &CFPath,
312-
bool V /*Verbose*/) {
435+
bool V, /*Verbose*/
436+
bool IsSetCoverMerge) {
313437
if (NewCorpus.empty() && OldCorpus.empty()) return; // Nothing to merge.
314438
size_t NumAttempts = 0;
315439
std::vector<MergeFileInfo> KnownFiles;
@@ -364,14 +488,17 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
364488
// Every inner process should execute at least one input.
365489
Command BaseCmd(Args);
366490
BaseCmd.removeFlag("merge");
491+
BaseCmd.removeFlag("set_cover_merge");
367492
BaseCmd.removeFlag("fork");
368493
BaseCmd.removeFlag("collect_data_flow");
369494
for (size_t Attempt = 1; Attempt <= NumAttempts; Attempt++) {
370495
Fuzzer::MaybeExitGracefully();
371496
VPrintf(V, "MERGE-OUTER: attempt %zd\n", Attempt);
372497
Command Cmd(BaseCmd);
373498
Cmd.addFlag("merge_control_file", CFPath);
374-
Cmd.addFlag("merge_inner", "1");
499+
// If we are going to use the set cover implementation for
500+
// minimization add the merge_inner=2 internal flag.
501+
Cmd.addFlag("merge_inner", IsSetCoverMerge ? "2" : "1");
375502
if (!V) {
376503
Cmd.setOutputFile(getDevNull());
377504
Cmd.combineOutAndErr();
@@ -396,7 +523,10 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
396523
M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb());
397524

398525
M.Files.insert(M.Files.end(), KnownFiles.begin(), KnownFiles.end());
399-
M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
526+
if (IsSetCoverMerge)
527+
M.SetCoverMerge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
528+
else
529+
M.Merge(InitialFeatures, NewFeatures, InitialCov, NewCov, NewFiles);
400530
VPrintf(V, "MERGE-OUTER: %zd new files with %zd new features added; "
401531
"%zd new coverage edges\n",
402532
NewFiles->size(), NewFeatures->size(), NewCov->size());

compiler-rt/lib/fuzzer/FuzzerMerge.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ struct Merger {
6969
std::set<uint32_t> *NewFeatures,
7070
const std::set<uint32_t> &InitialCov, std::set<uint32_t> *NewCov,
7171
std::vector<std::string> *NewFiles);
72+
size_t SetCoverMerge(const std::set<uint32_t> &InitialFeatures,
73+
std::set<uint32_t> *NewFeatures,
74+
const std::set<uint32_t> &InitialCov,
75+
std::set<uint32_t> *NewCov,
76+
std::vector<std::string> *NewFiles);
7277
size_t ApproximateMemoryConsumption() const;
7378
std::set<uint32_t> AllFeatures() const;
7479
};
@@ -81,7 +86,7 @@ void CrashResistantMerge(const std::vector<std::string> &Args,
8186
std::set<uint32_t> *NewFeatures,
8287
const std::set<uint32_t> &InitialCov,
8388
std::set<uint32_t> *NewCov, const std::string &CFPath,
84-
bool Verbose);
89+
bool Verbose, bool IsSetCoverMerge);
8590

8691
} // namespace fuzzer
8792

0 commit comments

Comments
 (0)