Skip to content

Commit c05d43e

Browse files
authored
featured: use run() to run cli commands in place of check_call() (#177)
Fixes: #20662 During some reboots, it was observed that some times featured.service script command fails to start the services like pmon, snmp, lldp etc. From logs, it was observed that 'sudo systemctl enable ' command failed with errorcode 13 (SIGPIPE. 2024 Oct 29 01:31:26.191236 aaa14-rp INFO featured: Running cmd: '['sudo', 'systemctl', 'unmask', 'pmon.service']' 2024 Oct 29 01:31:26.211167 aaa14-rp INFO systemd[1]: Reloading. 2024 Oct 29 01:31:27.212381 aaa14-rp INFO featured: Running cmd: '['sudo', 'systemctl', 'enable', 'pmon.service']' 2024 Oct 29 01:31:27.232428 aaa14-rp INFO systemd[1]: Reloading. 2024 Oct 29 01:31:28.135667 aaa14-rp ERR featured: ['sudo', 'systemctl', 'enable', 'pmon.service'] - failed: return code - -13, output:#012None 2024 Oct 29 01:31:28.135746 aaa14-rp ERR featured: Feature 'pmon.service' failed to be enabled and started 2024 Oct 29 01:34:08.661711 aaa14-rp INFO featured: Running cmd: '['sudo', 'systemctl', 'enable', 'gnmi.service']' 2024 Oct 29 01:34:08.677242 aaa14-rp INFO systemd[1]: Reloading. 2024 Oct 29 01:34:09.316554 aaa14-rp ERR featured: ['sudo', 'systemctl', 'enable', 'gnmi.service'] - failed: return code - -13, output:#012None 2024 Oct 29 01:34:09.316791 aaa14-rp ERR featured: Feature 'gnmi.service' failed to be enabled and started The issue does not recover and the pmon and other services never starts. On supervisor this also leads to swss, syncd and other related docker to stay down. In general systemctl enable does not work for some services like pmon, snmp, lldp etc as there is no WantBy directive set for these services in unit file. The command returns stderr : "The unit files have no installation config (WantedBy=, RequiredBy=, Also=, Alias= settings in the [Install] section, and DefaultInstance= for template units). This means they are not meant to be enabled using systemctl. Possible reasons for having this kind of units are: • A unit may be statically enabled by being symlinked from another unit's .wants/ or .requires/ directory. • A unit's purpose may be to act as a helper for some other unit which has a requirement dependency on it. • A unit may be started when needed via activation (socket, path, timer, D-Bus, udev, scripted systemctl call, ...). • In case of template units, the unit is meant to be enabled with some instance name specified. ” featured python script uses subprocess.check_call() function to invoke the command which looks like is not very reliable at handling the stderr and may cause SIGPIPE with big buffer data. Modifying the function to use subprocess.run() resolves this issue. run() is more reliable at handing the return data. Validated the change with multiple reboots.
1 parent ff73070 commit c05d43e

File tree

3 files changed

+192
-187
lines changed

3 files changed

+192
-187
lines changed

scripts/featured

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,11 @@ PORT_INIT_TIMEOUT_SEC = 180
2626

2727
def run_cmd(cmd, log_err=True, raise_exception=False):
2828
try:
29-
subprocess.check_call(cmd)
29+
result = subprocess.run(cmd,
30+
capture_output=True,
31+
check=True, text=True)
32+
syslog.syslog(syslog.LOG_INFO, "Output: {} , Stderr: {}"
33+
.format(result.stdout, result.stderr))
3034
except Exception as err:
3135
if log_err:
3236
syslog.syslog(syslog.LOG_ERR, "{} - failed: return code - {}, output:\n{}"

tests/featured/featured_test.py

Lines changed: 62 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,9 @@ def test_sync_state_field(self, test_scenario_name, config_data, fs):
175175
feature_table_state_db_calls = self.get_state_db_set_calls(feature_table)
176176

177177
self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
178-
mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'],
178+
mocked_subprocess.run.assert_has_calls(config_data['enable_feature_subprocess_calls'],
179179
any_order=True)
180-
mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'],
180+
mocked_subprocess.run.assert_has_calls(config_data['daemon_reload_subprocess_call'],
181181
any_order=True)
182182
feature_state_table_mock.set.assert_has_calls(feature_table_state_db_calls)
183183
self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
@@ -227,9 +227,9 @@ def test_handler(self, test_scenario_name, config_data, fs):
227227
feature_systemd_name_map[feature_name] = feature_names
228228

229229
self.checks_systemd_config_file(device_type, config_data['config_db']['FEATURE'], feature_systemd_name_map)
230-
mocked_subprocess.check_call.assert_has_calls(config_data['enable_feature_subprocess_calls'],
230+
mocked_subprocess.run.assert_has_calls(config_data['enable_feature_subprocess_calls'],
231231
any_order=True)
232-
mocked_subprocess.check_call.assert_has_calls(config_data['daemon_reload_subprocess_call'],
232+
mocked_subprocess.run.assert_has_calls(config_data['daemon_reload_subprocess_call'],
233233
any_order=True)
234234

235235
def test_feature_config_parsing(self):
@@ -375,15 +375,15 @@ def test_feature_events(self, mock_syslog, get_runtime):
375375
daemon.start(time.time())
376376
except TimeoutError as e:
377377
pass
378-
expected = [call(['sudo', 'systemctl', 'daemon-reload']),
379-
call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service']),
380-
call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service']),
381-
call(['sudo', 'systemctl', 'start', 'dhcp_relay.service']),
382-
call(['sudo', 'systemctl', 'daemon-reload']),
383-
call(['sudo', 'systemctl', 'unmask', 'mux.service']),
384-
call(['sudo', 'systemctl', 'enable', 'mux.service']),
385-
call(['sudo', 'systemctl', 'start', 'mux.service'])]
386-
mocked_subprocess.check_call.assert_has_calls(expected)
378+
expected = [call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
379+
call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
380+
call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
381+
call(['sudo', 'systemctl', 'start', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
382+
call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
383+
call(['sudo', 'systemctl', 'unmask', 'mux.service'], capture_output=True, check=True, text=True),
384+
call(['sudo', 'systemctl', 'enable', 'mux.service'], capture_output=True, check=True, text=True),
385+
call(['sudo', 'systemctl', 'start', 'mux.service'], capture_output=True, check=True, text=True)]
386+
mocked_subprocess.run.assert_has_calls(expected, any_order=True)
387387

388388
# Change the state to disabled
389389
MockSelect.reset_event_queue()
@@ -393,10 +393,10 @@ def test_feature_events(self, mock_syslog, get_runtime):
393393
daemon.start(time.time())
394394
except TimeoutError:
395395
pass
396-
expected = [call(['sudo', 'systemctl', 'stop', 'dhcp_relay.service']),
397-
call(['sudo', 'systemctl', 'disable', 'dhcp_relay.service']),
398-
call(['sudo', 'systemctl', 'mask', 'dhcp_relay.service'])]
399-
mocked_subprocess.check_call.assert_has_calls(expected)
396+
expected = [call(['sudo', 'systemctl', 'stop', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
397+
call(['sudo', 'systemctl', 'disable', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
398+
call(['sudo', 'systemctl', 'mask', 'dhcp_relay.service'], capture_output=True, check=True, text=True)]
399+
mocked_subprocess.run.assert_has_calls(expected, any_order=True)
400400

401401
def test_delayed_service(self, mock_syslog, get_runtime):
402402
MockSelect.set_event_queue([('FEATURE', 'dhcp_relay'),
@@ -417,20 +417,20 @@ def test_delayed_service(self, mock_syslog, get_runtime):
417417
daemon.start(time.time())
418418
except TimeoutError:
419419
pass
420-
expected = [call(['sudo', 'systemctl', 'daemon-reload']),
421-
call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service']),
422-
call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service']),
423-
call(['sudo', 'systemctl', 'start', 'dhcp_relay.service']),
424-
call(['sudo', 'systemctl', 'daemon-reload']),
425-
call(['sudo', 'systemctl', 'unmask', 'mux.service']),
426-
call(['sudo', 'systemctl', 'enable', 'mux.service']),
427-
call(['sudo', 'systemctl', 'start', 'mux.service']),
428-
call(['sudo', 'systemctl', 'daemon-reload']),
429-
call(['sudo', 'systemctl', 'unmask', 'telemetry.service']),
430-
call(['sudo', 'systemctl', 'enable', 'telemetry.service']),
431-
call(['sudo', 'systemctl', 'start', 'telemetry.service'])]
432-
433-
mocked_subprocess.check_call.assert_has_calls(expected)
420+
expected = [call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
421+
call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
422+
call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
423+
call(['sudo', 'systemctl', 'start', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
424+
call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
425+
call(['sudo', 'systemctl', 'unmask', 'mux.service'], capture_output=True, check=True, text=True),
426+
call(['sudo', 'systemctl', 'enable', 'mux.service'], capture_output=True, check=True, text=True),
427+
call(['sudo', 'systemctl', 'start', 'mux.service'], capture_output=True, check=True, text=True),
428+
call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
429+
call(['sudo', 'systemctl', 'unmask', 'telemetry.service'], capture_output=True, check=True, text=True),
430+
call(['sudo', 'systemctl', 'enable', 'telemetry.service'], capture_output=True, check=True, text=True),
431+
call(['sudo', 'systemctl', 'start', 'telemetry.service'], capture_output=True, check=True, text=True)]
432+
433+
mocked_subprocess.run.assert_has_calls(expected, any_order=True)
434434

435435
def test_advanced_reboot(self, mock_syslog, get_runtime):
436436
MockRestartWaiter.advancedReboot = True
@@ -442,25 +442,26 @@ def test_advanced_reboot(self, mock_syslog, get_runtime):
442442
daemon = featured.FeatureDaemon()
443443
daemon.render_all_feature_states()
444444
daemon.register_callbacks()
445-
try:
445+
try:
446446
daemon.start(time.time())
447447
except TimeoutError:
448-
pass
449-
expected = [call(['sudo', 'systemctl', 'daemon-reload']),
450-
call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service']),
451-
call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service']),
452-
call(['sudo', 'systemctl', 'start', 'dhcp_relay.service']),
453-
call(['sudo', 'systemctl', 'daemon-reload']),
454-
call(['sudo', 'systemctl', 'unmask', 'mux.service']),
455-
call(['sudo', 'systemctl', 'enable', 'mux.service']),
456-
call(['sudo', 'systemctl', 'start', 'mux.service']),
457-
call(['sudo', 'systemctl', 'daemon-reload']),
458-
call(['sudo', 'systemctl', 'unmask', 'telemetry.service']),
459-
call(['sudo', 'systemctl', 'enable', 'telemetry.service']),
460-
call(['sudo', 'systemctl', 'start', 'telemetry.service'])]
461-
462-
mocked_subprocess.check_call.assert_has_calls(expected)
463-
448+
pass
449+
expected = [
450+
call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
451+
call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
452+
call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
453+
call(['sudo', 'systemctl', 'start', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
454+
call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
455+
call(['sudo', 'systemctl', 'unmask', 'mux.service'], capture_output=True, check=True, text=True),
456+
call(['sudo', 'systemctl', 'enable', 'mux.service'], capture_output=True, check=True, text=True),
457+
call(['sudo', 'systemctl', 'start', 'mux.service'], capture_output=True, check=True, text=True),
458+
call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
459+
call(['sudo', 'systemctl', 'unmask', 'telemetry.service'], capture_output=True, check=True, text=True),
460+
call(['sudo', 'systemctl', 'enable', 'telemetry.service'], capture_output=True, check=True, text=True),
461+
call(['sudo', 'systemctl', 'start', 'telemetry.service'], capture_output=True, check=True, text=True)]
462+
463+
mocked_subprocess.run.assert_has_calls(expected, any_order=True)
464+
464465
def test_portinit_timeout(self, mock_syslog, get_runtime):
465466
print(MockConfigDb.CONFIG_DB)
466467
MockSelect.NUM_TIMEOUT_TRIES = 1
@@ -479,16 +480,16 @@ def test_portinit_timeout(self, mock_syslog, get_runtime):
479480
daemon.start(0.0)
480481
except TimeoutError:
481482
pass
482-
expected = [call(['sudo', 'systemctl', 'daemon-reload']),
483-
call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service']),
484-
call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service']),
485-
call(['sudo', 'systemctl', 'start', 'dhcp_relay.service']),
486-
call(['sudo', 'systemctl', 'daemon-reload']),
487-
call(['sudo', 'systemctl', 'unmask', 'mux.service']),
488-
call(['sudo', 'systemctl', 'enable', 'mux.service']),
489-
call(['sudo', 'systemctl', 'start', 'mux.service']),
490-
call(['sudo', 'systemctl', 'daemon-reload']),
491-
call(['sudo', 'systemctl', 'unmask', 'telemetry.service']),
492-
call(['sudo', 'systemctl', 'enable', 'telemetry.service']),
493-
call(['sudo', 'systemctl', 'start', 'telemetry.service'])]
494-
mocked_subprocess.check_call.assert_has_calls(expected)
483+
expected = [call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
484+
call(['sudo', 'systemctl', 'unmask', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
485+
call(['sudo', 'systemctl', 'enable', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
486+
call(['sudo', 'systemctl', 'start', 'dhcp_relay.service'], capture_output=True, check=True, text=True),
487+
call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
488+
call(['sudo', 'systemctl', 'unmask', 'mux.service'], capture_output=True, check=True, text=True),
489+
call(['sudo', 'systemctl', 'enable', 'mux.service'], capture_output=True, check=True, text=True),
490+
call(['sudo', 'systemctl', 'start', 'mux.service'], capture_output=True, check=True, text=True),
491+
call(['sudo', 'systemctl', 'daemon-reload'], capture_output=True, check=True, text=True),
492+
call(['sudo', 'systemctl', 'unmask', 'telemetry.service'], capture_output=True, check=True, text=True),
493+
call(['sudo', 'systemctl', 'enable', 'telemetry.service'], capture_output=True, check=True, text=True),
494+
call(['sudo', 'systemctl', 'start', 'telemetry.service'], capture_output=True, check=True, text=True)]
495+
mocked_subprocess.run.assert_has_calls(expected, any_order=True)

0 commit comments

Comments
 (0)