Skip to content

Commit 83d738e

Browse files
authored
Fix 'invalid magic number 0' bug (prometheus#11338)
Signed-off-by: Ganesh Vernekar <[email protected]>
1 parent f371d7f commit 83d738e

File tree

2 files changed

+65
-29
lines changed

2 files changed

+65
-29
lines changed

tsdb/chunks/head_chunks.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -372,11 +372,25 @@ func repairLastChunkFile(files map[int]string) (_ map[int]string, returnErr erro
372372
return files, nil
373373
}
374374

375-
info, err := os.Stat(files[lastFile])
375+
f, err := os.Open(files[lastFile])
376376
if err != nil {
377-
return files, errors.Wrap(err, "file stat during last head chunk file repair")
377+
return files, errors.Wrap(err, "open file during last head chunk file repair")
378378
}
379-
if info.Size() == 0 {
379+
380+
buf := make([]byte, MagicChunksSize)
381+
size, err := f.Read(buf)
382+
if err != nil && err != io.EOF {
383+
return files, errors.Wrap(err, "failed to read magic number during last head chunk file repair")
384+
}
385+
if err := f.Close(); err != nil {
386+
return files, errors.Wrap(err, "close file during last head chunk file repair")
387+
}
388+
389+
// We either don't have enough bytes for the magic number or the magic number is 0.
390+
// NOTE: we should not check for wrong magic number here because that error
391+
// needs to be sent up the function called (already done elsewhere)
392+
// for proper repair mechanism to happen in the Head.
393+
if size < MagicChunksSize || binary.BigEndian.Uint32(buf) == 0 {
380394
// Corrupt file, hence remove it.
381395
if err := os.RemoveAll(files[lastFile]); err != nil {
382396
return files, errors.Wrap(err, "delete corrupted, empty head chunk file during last file repair")

tsdb/chunks/head_chunks_test.go

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -387,9 +387,6 @@ func TestHeadReadWriter_TruncateAfterFailedIterateChunks(t *testing.T) {
387387

388388
func TestHeadReadWriter_ReadRepairOnEmptyLastFile(t *testing.T) {
389389
hrw := createChunkDiskMapper(t, "")
390-
defer func() {
391-
require.NoError(t, hrw.Close())
392-
}()
393390

394391
timeRange := 0
395392
addChunk := func() {
@@ -429,34 +426,59 @@ func TestHeadReadWriter_ReadRepairOnEmptyLastFile(t *testing.T) {
429426
dir := hrw.dir.Name()
430427
require.NoError(t, hrw.Close())
431428

432-
// Write an empty last file mimicking an abrupt shutdown on file creation.
433-
emptyFileName := segmentFile(dir, lastFile+1)
434-
f, err := os.OpenFile(emptyFileName, os.O_WRONLY|os.O_CREATE, 0o666)
435-
require.NoError(t, err)
436-
require.NoError(t, f.Sync())
437-
stat, err := f.Stat()
438-
require.NoError(t, err)
439-
require.Equal(t, int64(0), stat.Size())
440-
require.NoError(t, f.Close())
429+
writeCorruptLastFile := func(b []byte) {
430+
fname := segmentFile(dir, lastFile+1)
431+
f, err := os.OpenFile(fname, os.O_WRONLY|os.O_CREATE, 0o666)
432+
require.NoError(t, err)
433+
_, err = f.Write(b)
434+
require.NoError(t, err)
435+
require.NoError(t, f.Sync())
436+
stat, err := f.Stat()
437+
require.NoError(t, err)
438+
require.Equal(t, int64(len(b)), stat.Size())
439+
require.NoError(t, f.Close())
440+
}
441441

442-
// Open chunk disk mapper again, corrupt file should be removed.
443-
hrw = createChunkDiskMapper(t, dir)
442+
checkRepair := func() {
443+
// Open chunk disk mapper again, corrupt file should be removed.
444+
hrw = createChunkDiskMapper(t, dir)
444445

445-
// Removed from memory.
446-
require.Equal(t, 3, len(hrw.mmappedChunkFiles))
447-
for idx := range hrw.mmappedChunkFiles {
448-
require.LessOrEqual(t, idx, lastFile, "file index is bigger than previous last file")
449-
}
446+
// Removed from memory.
447+
require.Equal(t, 3, len(hrw.mmappedChunkFiles))
448+
for idx := range hrw.mmappedChunkFiles {
449+
require.LessOrEqual(t, idx, lastFile, "file index is bigger than previous last file")
450+
}
450451

451-
// Removed even from disk.
452-
files, err := os.ReadDir(dir)
453-
require.NoError(t, err)
454-
require.Equal(t, 3, len(files))
455-
for _, fi := range files {
456-
seq, err := strconv.ParseUint(fi.Name(), 10, 64)
452+
// Removed even from disk.
453+
files, err := os.ReadDir(dir)
457454
require.NoError(t, err)
458-
require.LessOrEqual(t, seq, uint64(lastFile), "file index on disk is bigger than previous last file")
455+
require.Equal(t, 3, len(files))
456+
for _, fi := range files {
457+
seq, err := strconv.ParseUint(fi.Name(), 10, 64)
458+
require.NoError(t, err)
459+
require.LessOrEqual(t, seq, uint64(lastFile), "file index on disk is bigger than previous last file")
460+
}
461+
462+
require.NoError(t, hrw.Close())
459463
}
464+
465+
// Write an empty last file mimicking an abrupt shutdown on file creation.
466+
writeCorruptLastFile(nil)
467+
// Removes empty last file.
468+
checkRepair()
469+
470+
// Write another empty last file with 0 bytes.
471+
writeCorruptLastFile([]byte{0, 0, 0, 0, 0, 0, 0, 0})
472+
// Removes the 0 filled last file.
473+
checkRepair()
474+
475+
// Write another corrupt file with less than 4 bytes (size of magic number).
476+
writeCorruptLastFile([]byte{1, 2})
477+
// Removes the partial file.
478+
checkRepair()
479+
480+
// Check that it does not delete a valid last file.
481+
checkRepair()
460482
}
461483

462484
func createChunkDiskMapper(t *testing.T, dir string) *ChunkDiskMapper {

0 commit comments

Comments
 (0)