Skip to content

Commit 3887053

Browse files
author
renzhen.rz
committed
snapshots/devmapper: deactivate thin device after committed
1. reason to deactivate committed snapshot The thin device will not be used for IO after committed, and further thin snapshotting is OK using an inactive thin device as origin. The benefits to deactivate are: - device is not unneccesary visible avoiding any unexpected IO; - save useless kernel data structs for maintaining active dm. Quote from kernel doc (Documentation/device-mapper/provisioning.txt): " ii) Using an internal snapshot. Once created, the user doesn't have to worry about any connection between the origin and the snapshot. Indeed the snapshot is no different from any other thinly-provisioned device and can be snapshotted itself via the same method. It's perfectly legal to have only one of them active, and there's no ordering requirement on activating or removing them both. (This differs from conventional device-mapper snapshots.) " 2. an thinpool metadata bug is naturally removed An problem happens when failed to suspend/resume origin thin device when creating snapshot: "failed to create snapshot device from parent vg0-mythinpool-snap-3" error="failed to save initial metadata for snapshot "vg0-mythinpool-snap-19": object already exists" This issue occurs because when failed to create snapshot, the snapshotter.store can be rollbacked, but the thin pool metadata boltdb failed to rollback in PoolDevice.CreateSnapshotDevice(), therefore metadata becomes inconsistent: the snapshotID is not taken in snapshotter.store, but saved in pool metadata boltdb. The cause is, in PoolDevice.CreateSnapshotDevice(), the defer calls are invoked on "first-in-last-out" order. When the error happens on the "resume device" defer call, the metadata is saved and snapshot is created, which has no chance to be rollbacked. Signed-off-by: Eric Ren <[email protected]>
1 parent d68b593 commit 3887053

4 files changed

Lines changed: 62 additions & 48 deletions

File tree

snapshots/devmapper/dmsetup/dmsetup.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,13 @@ func RemoveDevice(deviceName string, opts ...RemoveDeviceOpt) error {
201201
args = append(args, GetFullDevicePath(deviceName))
202202

203203
_, err := dmsetup(args...)
204+
if err == unix.ENXIO {
205+
// Ignore "No such device or address" error because we dmsetup
206+
// remove with "deferred" option, there is chance for the device
207+
// having been removed.
208+
return nil
209+
}
210+
204211
return err
205212
}
206213

@@ -372,9 +379,13 @@ func parseDmsetupError(output string) string {
372379
return ""
373380
}
374381

375-
const failedSubstr = "failed: "
376-
377382
line := lines[0]
383+
// Handle output like "Device /dev/mapper/snapshotter-suite-pool-snap-1 not found"
384+
if strings.HasSuffix(line, "not found") {
385+
return unix.ENXIO.Error()
386+
}
387+
388+
const failedSubstr = "failed: "
378389
idx := strings.LastIndex(line, failedSubstr)
379390
if idx == -1 {
380391
return ""

snapshots/devmapper/pool_device.go

Lines changed: 25 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -178,19 +178,6 @@ func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string
178178
return errors.Wrapf(err, "failed to query device metadata for %q", deviceName)
179179
}
180180

181-
// Suspend thin device if it was activated previously to avoid corruptions
182-
isActivated := p.IsActivated(baseInfo.Name)
183-
if isActivated {
184-
if err := p.suspendDevice(ctx, baseInfo); err != nil {
185-
return err
186-
}
187-
188-
// Resume back base thin device on exit
189-
defer func() {
190-
retErr = multierror.Append(retErr, p.resumeDevice(ctx, baseInfo)).ErrorOrNil()
191-
}()
192-
}
193-
194181
snapInfo := &DeviceInfo{
195182
Name: snapshotName,
196183
Size: virtualSizeBytes,
@@ -230,26 +217,6 @@ func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string
230217
return p.activateDevice(ctx, snapInfo)
231218
}
232219

233-
func (p *PoolDevice) suspendDevice(ctx context.Context, info *DeviceInfo) error {
234-
if err := p.transition(ctx, info.Name, Suspending, Suspended, func() error {
235-
return dmsetup.SuspendDevice(info.Name)
236-
}); err != nil {
237-
return errors.Wrapf(err, "failed to suspend device %q", info.Name)
238-
}
239-
240-
return nil
241-
}
242-
243-
func (p *PoolDevice) resumeDevice(ctx context.Context, info *DeviceInfo) error {
244-
if err := p.transition(ctx, info.Name, Resuming, Resumed, func() error {
245-
return dmsetup.ResumeDevice(info.Name)
246-
}); err != nil {
247-
return errors.Wrapf(err, "failed to resume device %q", info.Name)
248-
}
249-
250-
return nil
251-
}
252-
253220
func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *DeviceInfo) error {
254221
if err := p.transition(ctx, snapInfo.Name, Creating, Created, func() error {
255222
return dmsetup.CreateSnapshot(p.poolName, snapInfo.DeviceID, baseInfo.DeviceID)
@@ -265,16 +232,30 @@ func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *Dev
265232
return nil
266233
}
267234

235+
// SuspendDevice flushes the outstanding IO and blocks the further IO
236+
func (p *PoolDevice) SuspendDevice(ctx context.Context, deviceName string) error {
237+
if err := p.transition(ctx, deviceName, Suspending, Suspended, func() error {
238+
return dmsetup.SuspendDevice(deviceName)
239+
}); err != nil {
240+
return errors.Wrapf(err, "failed to suspend device %q", deviceName)
241+
}
242+
243+
return nil
244+
}
245+
268246
// DeactivateDevice deactivates thin device
269-
func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred bool) error {
270-
if !p.IsActivated(deviceName) {
247+
func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred, withForce bool) error {
248+
if !p.IsLoaded(deviceName) {
271249
return nil
272250
}
273251

274-
opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithForce, dmsetup.RemoveWithRetries}
252+
opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithRetries}
275253
if deferred {
276254
opts = append(opts, dmsetup.RemoveDeferred)
277255
}
256+
if withForce {
257+
opts = append(opts, dmsetup.RemoveWithForce)
258+
}
278259

279260
if err := p.transition(ctx, deviceName, Deactivating, Deactivated, func() error {
280261
return dmsetup.RemoveDevice(deviceName, opts...)
@@ -300,6 +281,12 @@ func (p *PoolDevice) IsActivated(deviceName string) bool {
300281
return true
301282
}
302283

284+
// IsLoaded returns true if thin-device is visible for dmsetup
285+
func (p *PoolDevice) IsLoaded(deviceName string) bool {
286+
_, err := dmsetup.Info(deviceName)
287+
return err == nil
288+
}
289+
303290
// GetUsage reports total size in bytes consumed by a thin-device.
304291
// It relies on the number of used blocks reported by 'dmsetup status'.
305292
// The output looks like:
@@ -330,7 +317,7 @@ func (p *PoolDevice) RemoveDevice(ctx context.Context, deviceName string) error
330317
return errors.Wrapf(err, "can't query metadata for device %q", deviceName)
331318
}
332319

333-
if err := p.DeactivateDevice(ctx, deviceName, true); err != nil {
320+
if err := p.DeactivateDevice(ctx, deviceName, true, true); err != nil {
334321
return err
335322
}
336323

@@ -368,7 +355,7 @@ func (p *PoolDevice) RemovePool(ctx context.Context) error {
368355

369356
// Deactivate devices if any
370357
for _, name := range deviceNames {
371-
if err := p.DeactivateDevice(ctx, name, true); err != nil {
358+
if err := p.DeactivateDevice(ctx, name, true, true); err != nil {
372359
result = multierror.Append(result, errors.Wrapf(err, "failed to remove %q", name))
373360
}
374361
}

snapshots/devmapper/pool_device_test.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,19 +107,24 @@ func TestPoolDevice(t *testing.T) {
107107
testMakeFileSystem(t, pool)
108108
})
109109

110-
// Mount 'thin-1'
110+
// Mount 'thin-1' and write v1 test file on 'thin-1' device
111111
err = mount.WithTempMount(ctx, getMounts(thinDevice1), func(thin1MountPath string) error {
112112
// Write v1 test file on 'thin-1' device
113113
thin1TestFilePath := filepath.Join(thin1MountPath, "TEST")
114114
err := ioutil.WriteFile(thin1TestFilePath, []byte("test file (v1)"), 0700)
115115
assert.NilError(t, err, "failed to write test file v1 on '%s' volume", thinDevice1)
116116

117-
// Take snapshot of 'thin-1'
118-
t.Run("CreateSnapshotDevice", func(t *testing.T) {
119-
testCreateSnapshot(t, pool)
120-
})
117+
return nil
118+
})
119+
120+
// Take snapshot of 'thin-1'
121+
t.Run("CreateSnapshotDevice", func(t *testing.T) {
122+
testCreateSnapshot(t, pool)
123+
})
121124

122-
// Update TEST file on 'thin-1' to v2
125+
// Update TEST file on 'thin-1' to v2
126+
err = mount.WithTempMount(ctx, getMounts(thinDevice1), func(thin1MountPath string) error {
127+
thin1TestFilePath := filepath.Join(thin1MountPath, "TEST")
123128
err = ioutil.WriteFile(thin1TestFilePath, []byte("test file (v2)"), 0700)
124129
assert.NilError(t, err, "failed to write test file v2 on 'thin-1' volume after taking snapshot")
125130

@@ -204,7 +209,7 @@ func testDeactivateThinDevice(t *testing.T, pool *PoolDevice) {
204209
for _, deviceName := range deviceList {
205210
assert.Assert(t, pool.IsActivated(deviceName))
206211

207-
err := pool.DeactivateDevice(context.Background(), deviceName, false)
212+
err := pool.DeactivateDevice(context.Background(), deviceName, false, true)
208213
assert.NilError(t, err, "failed to remove '%s'", deviceName)
209214

210215
assert.Assert(t, !pool.IsActivated(deviceName))

snapshots/devmapper/snapshotter.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,18 @@ func (s *Snapshotter) Commit(ctx context.Context, name, key string, opts ...snap
272272
}
273273

274274
_, err = storage.CommitActive(ctx, key, name, usage, opts...)
275-
return err
275+
if err != nil {
276+
return err
277+
}
278+
279+
// The thin snapshot is not used for IO after committed, so
280+
// suspend to flush the IO and deactivate the device.
281+
err = s.pool.SuspendDevice(ctx, deviceName)
282+
if err != nil {
283+
return err
284+
}
285+
286+
return s.pool.DeactivateDevice(ctx, deviceName, true, false)
276287
})
277288
}
278289

0 commit comments

Comments
 (0)