Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/workflows/windows-hyperv-periodic-trigger.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Workflow intended to periodically run the Windows Hyper-V Integration test workflow.

name: Windows Hyper-V Periodic Tests

on:
workflow_dispatch:
schedule:
- cron: "0 1 * * *"

jobs:

triggerWinIntegration:
if: github.repository == 'containerd/containerd'
# NOTE(aznashwan, 11/24/21): GitHub actions do not currently support referencing
# or evaluating any kind of variables in the `uses` clause, but this will
# ideally be added in the future in which case the hardcoded reference to the
# upstream containerd repository should be replaced with the following to
# potentially allow contributors to enable periodic Windows tests on forks as well:
# uses: "${{ github.repository }}/.github/workflows/windows-hyperv-periodic.yml@${{ github.ref_name }}"
uses: containerd/containerd/.github/workflows/windows-hyperv-periodic.yml@main
secrets:
AZURE_SUB_ID: "${{ secrets.AZURE_SUB_ID }}"
AZURE_CREDS: "${{ secrets.AZURE_CREDS }}"
GCP_SERVICE_ACCOUNT: "${{ secrets.GCP_SERVICE_ACCOUNT }}"
GCP_WORKLOAD_IDENTITY_PROVIDER: "${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}"
351 changes: 351 additions & 0 deletions .github/workflows/windows-hyperv-periodic.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,351 @@
# Workflow intended to run containerd integration tests on Windows using Hyper-V Containers.

name: Windows Hyper-V Integration Tests

on:
workflow_dispatch:
workflow_call:
secrets:
AZURE_SUB_ID:
required: true
AZURE_CREDS:
required: true
GCP_SERVICE_ACCOUNT:
required: true
GCP_WORKLOAD_IDENTITY_PROVIDER:
required: true

env:
AZURE_DEFAULT_LOCATION: westeurope
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUB_ID }}
AZURE_DEFAULT_VM_SIZE: Standard_D2s_v3
PASSWORD: Passw0rdAdmin # temp for testing, will be generated
DEFAULT_ADMIN_USERNAME: azureuser
SSH_OPTS: "-o ServerAliveInterval=20 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
REMOTE_VM_BIN_PATH: "c:\\containerd\\bin"
BUSYBOX_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/busybox:1.29-2"
RESOURCE_CONSUMER_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/resource-consumer:1.10"
WEBSERVER_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/nginx:1.14-2"
HCSSHIM_TAG: "master"


jobs:
winIntegration:
# NOTE: the following permissions are required by `google-github-actions/auth`:
permissions:
contents: 'read'
id-token: 'write'
strategy:
# NOTE(aznashwan): this will permit all other jobs from the matrix to finish and
# upload their results even if one has a failing non-test-task:
# (e.g. hitting resource limits in the `AZTestVMCreate` task)
fail-fast: false
matrix:
win_ver: [ltsc2019, ltsc2022]
include:
- win_ver: ltsc2019
AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2019-Datacenter-with-Containers-smalldisk:17763.2565.220202"
AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2019-${{ github.run_id }}
GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2019-hyperv/"
- win_ver: ltsc2022
AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2022-datacenter-smalldisk-g2:20348.524.220201"
Comment thread
aznashwan marked this conversation as resolved.
AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2022-${{ github.run_id }}
GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2022-hyperv/"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- name: Install required packages
run: |
sudo apt-get install xmlstarlet -y

- name: PrepareArtifacts
run: |
STARTED_TIME=$(date +%s)
LOGS_DIR=$HOME/$STARTED_TIME
echo "STARTED_TIME=$STARTED_TIME" >> $GITHUB_ENV
echo "LOGS_DIR=$LOGS_DIR" >> $GITHUB_ENV

echo "VM_INTEGRATION_LOGFILE=/c/Logs/integration.log" >> $GITHUB_ENV
echo "VM_CRI_INTEGRATION_LOGFILE=/c/Logs/cri-integration.log" >> $GITHUB_ENV

mkdir -p $LOGS_DIR/artifacts
jq -n --arg node temp --arg timestamp $STARTED_TIME '$timestamp|tonumber|{timestamp:.,$node}' > $LOGS_DIR/started.json

- name: Generate ssh key pair
run: |
mkdir -p $HOME/.ssh/
ssh-keygen -t rsa -b 4096 -C "[email protected]" -f $HOME/.ssh/id_rsa -q -N ""
echo "SSH_PUB_KEY=$(cat ~/.ssh/id_rsa.pub)" >> $GITHUB_ENV

- name: AZLogin
uses: azure/login@v1
with:
creds: ${{ secrets.AZURE_CREDS }}

- name: AZResourceGroupCreate
uses: azure/CLI@v1
with:
inlinescript: |
az group create -n ${{ matrix.AZURE_RESOURCE_GROUP }} -l ${{ env.AZURE_DEFAULT_LOCATION }} --tags creationTimestamp=$(date -u '+%Y-%m-%dT%H:%M:%SZ')

- name: AZTestVMCreate
uses: azure/CLI@v1
with:
inlinescript: |
DETAILS=$(az vm create -n winTestVM --admin-username ${{ env.DEFAULT_ADMIN_USERNAME }} --admin-password ${{ env.PASSWORD }} --image ${{ matrix.AZURE_IMG }} -g ${{ matrix.AZURE_RESOURCE_GROUP }} --nsg-rule SSH --size ${{ env.AZURE_DEFAULT_VM_SIZE }} --public-ip-sku Standard -o json)
PUB_IP=$(echo $DETAILS | jq -r .publicIpAddress)
if [ "$PUB_IP" == "null" ]
then
RETRY=0
while [ "$PUB_IP" == "null" ] || [ $RETRY -le 5 ]
do
sleep 5
PUB_IP=$(az vm show -d -g ${{ matrix.AZURE_RESOURCE_GROUP }} -n winTestVM -o json --query publicIps | jq -r)
RETRY=$(( $RETRY + 1 ))
done
fi

if [ "$PUB_IP" == "null" ]
then
echo "failed to fetch public IP"
exit 1
fi
echo "VM_PUB_IP=$PUB_IP" >> $GITHUB_ENV

- name: EnableAZVMSSH
uses: azure/CLI@v1
with:
inlinescript: |
az vm run-command invoke --command-id RunPowerShellScript -n winTestVM -g ${{ matrix.AZURE_RESOURCE_GROUP }} --scripts @$GITHUB_WORKSPACE/script/setup/enable_ssh_windows.ps1 --parameters 'SSHPublicKey=${{ env.SSH_PUB_KEY }}'

- name: TestSSHConnection
run: |
if ! ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname";
then
exit 1
fi

- name: InstallAdditionalFeaturesWS2022
if: ${{ matrix.win_ver == 'ltsc2022' }}
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Install-WindowsFeature -Name 'Containers' }"
# NOTE(aznashwan): the 2022 image needs Hyper-V to be explicitly enabled:
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Install-WindowsFeature -Name Hyper-V -IncludeAllSubFeature -IncludeManagementTools }"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "shutdown.exe /r /t 0"


- name: WaitForVMToRestart
if: ${{ matrix.win_ver == 'ltsc2022' }}
timeout-minutes: 5
run: |
# give the vm 30 seconds to actually stop. SSH server might actually respond while server is shutting down.
sleep 30
while [ ! $( ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname") ];
do
echo "Unable to connect to azurevm"
done
echo "Connection reestablished. VM restarted succesfully."

- name: CreateNatNetworkWS2022
if: ${{ matrix.win_ver == 'ltsc2022' }}
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { curl.exe -L 'https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1' -o hns.psm1 }"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Import-Module .\hns.psm1 ; New-HnsNetwork -Type NAT -Name nat -AddressPrefix 172.19.208.0/20 -Gateway 172.19.208.1 }"

- name: PrepareTestingEnv
run: |
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} $GITHUB_WORKSPACE/script/setup/prepare_env_windows.ps1 azureuser@${{ env.VM_PUB_IP }}:/prepare_env_windows.ps1
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "c:\\prepare_env_windows.ps1"

- name: MakeContainerDBins
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone http://github.com/containerd/containerd c:\\containerd "
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "cd c:\containerd ; make binaries"

- name: BuildHcsshim
run: |
# NOTE(aznashwan, 6/6/22): need to use tip of HCSSHIM for the following:
# https://github.com/microsoft/hcsshim/pull/1388
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} ${{ env.DEFAULT_ADMIN_USERNAME }}@${{ env.VM_PUB_IP }} "git clone http://github.com/Microsoft/hcsshim c:\containerd\hcsshim"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} ${{ env.DEFAULT_ADMIN_USERNAME }}@${{ env.VM_PUB_IP }} "cd c:\containerd\hcsshim; git fetch --tags origin $HCSSHIM_TAG ; \
git checkout $HCSSHIM_TAG ; go build -mod=vendor -o ${{ env.REMOTE_VM_BIN_PATH }}\containerd-shim-runhcs-v1.exe .\cmd\containerd-shim-runhcs-v1"

- name: RunIntegrationTests
id: RunIntegrationTests
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" << EOF
cd /c/containerd
export EXTRA_TESTFLAGS="-timeout=20m"
export USE_HYPERV=1
set -o pipefail
make integration | tee ${{ env.VM_INTEGRATION_LOGFILE }}
EOF
echo '::set-output name=SUCCEEDED::1'

- name: PrepareRepoList
run: |
cat > containerd-hyperv-config.toml << EOF
version = 2

[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "runhcs-wcow-hypervisor"

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runhcs-wcow-hypervisor]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = "io.containerd.runhcs.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runhcs-wcow-hypervisor.options]
Debug = true
DebugType = 2
SandboxPlatform = "windows/amd64"
SandboxIsolation = 1
EOF

cat > repolist.toml << EOF
busybox = "${{ env.BUSYBOX_TESTING_IMAGE_REF }}"
ResourceConsumer = "${{ env.RESOURCE_CONSUMER_TESTING_IMAGE_REF }}"
EOF

cat > cri-test-images.yaml << EOF
defaultTestContainerImage: ${{ env.BUSYBOX_TESTING_IMAGE_REF }}
webServerTestImage: ${{ env.WEBSERVER_TESTING_IMAGE_REF }}
EOF

scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} repolist.toml azureuser@${{ env.VM_PUB_IP }}:c:/repolist.toml
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} cri-test-images.yaml azureuser@${{ env.VM_PUB_IP }}:c:/cri-test-images.yaml
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} containerd-hyperv-config.toml azureuser@${{ env.VM_PUB_IP }}:c:/containerd-hyperv-config.toml

# NOTE(aznashwan): in-tree integration tests will need some updates to on
# Hyper-V containers so we skip this for now:
- name: RunCRIIntegrationTests
id: RunCRIIntegrationTests
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <<EOF
cd c:/containerd
./script/setup/install-cni-windows
export TEST_IMAGE_LIST=c:/repolist.toml
export USE_HYPERV=1
Comment thread
aznashwan marked this conversation as resolved.
# NOTE: 'TestContainerdRestart' should be skipped as discussed in:
# https://github.com/containerd/containerd/pull/7025
export FOCUS="[^TestContainerdRestart$]"
set -o pipefail
make cri-integration | tee ${{ env.VM_CRI_INTEGRATION_LOGFILE }}
EOF
echo '::set-output name=SUCCEEDED::1'

- name: GetCritestRepo
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone https://github.com/kubernetes-sigs/cri-tools c:/cri-tools"

- name: BuildCritest
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'cd /c/cri-tools && make critest'"

- name: RunCritest
id: RunCritest
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
# This test is exceedingly flaky only on ws2022 so skip for now to keep CI happy.
# Info: https://github.com/containerd/containerd/issues/6652
SKIP=""
if [ '${{ matrix.win_ver }}' == 'ltsc2022' ];then
SKIP='-ginkgo.skip="runtime should support exec with tty=true and stdin=true"'
fi

ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { C:\containerd\bin\containerd.exe --log-level=debug --config=c:/containerd-hyperv-config.toml --log-file=C:/logs/containerd.log --service-name containerd --register-service ; Set-Service containerd -StartupType Automatic; Start-Service containerd }"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <<EOF
sleep 5
set -o pipefail
c:/cri-tools/build/bin/critest.exe $SKIP --runtime-endpoint='npipe://./pipe/containerd-containerd' --test-images-file='c:/cri-test-images.yaml' --report-dir='c:/Logs' -ginkgo.junit-report="C:\Logs\junit_critest.xml" | tee c:/Logs/critest.log
EOF
echo '::set-output name=SUCCEEDED::1'

- name: PullLogsFromWinNode
run: |
# Generate JUnit reports from the stdouts of the tests:
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_INTEGRATION_LOGFILE }}; cat ${{ env.VM_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_integration.xml'"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_CRI_INTEGRATION_LOGFILE }}; cat ${{ env.VM_CRI_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_cri_integration.xml'"

# Copy over all the JUnit reports:
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }}:c:/Logs/*.xml ${{ env.LOGS_DIR }}/artifacts/
for f in $(ls ${{ env.LOGS_DIR }}/artifacts/*.xml); do
xmlstarlet ed -d "/testsuites/testsuite/properties" $f > ${{ env.LOGS_DIR }}/$(basename $f)
mv ${{ env.LOGS_DIR }}/$(basename $f) $f
done

- name: FinishJob
run: |
jq -n --arg result SUCCESS --arg timestamp $(date +%s) '$timestamp|tonumber|{timestamp:.,$result}' > ${{ env.LOGS_DIR }}/finished.json
echo "${{ env.STARTED_TIME }}" > ${{ github.workspace }}/latest-build.txt

- name: AssignGcpCreds
id: AssignGcpCreds
run: |
echo '::set-output name=GCP_SERVICE_ACCOUNT::${{ secrets.GCP_SERVICE_ACCOUNT }}'
echo '::set-output name=GCP_WORKLOAD_IDENTITY_PROVIDER::${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}'

- name: AuthGcp
uses: google-github-actions/auth@v0
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}

- name: UploadJobReport
uses: google-github-actions/[email protected]
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
path: ${{ github.workspace }}/latest-build.txt
destination: ${{ matrix.GOOGLE_BUCKET }}
parent: false

- name: UploadLogsDir
uses: google-github-actions/[email protected]
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
path: ${{ env.LOGS_DIR }}
destination: ${{ matrix.GOOGLE_BUCKET }}${{ env.STARTED_TIME}}
parent: false

- name: Check all CI stages succeeded
uses: actions/github-script@v3
with:
script: |
const stepResults = {
RunIntegrationTests: "${{ steps.RunIntegrationTests.outputs.SUCCEEDED }}",
RunCRIIntegrationTests: "${{ steps.RunCRIIntegrationTests.outputs.SUCCEEDED }}",
RunCritest: "${{ steps.RunCritest.outputs.SUCCEEDED }}",
};
let failedTasks = [];
for( [step, result] of Object.entries(stepResults) ) {
if (result != "1") {
failedTasks.push(step);
}
};
if (failedTasks.length != 0) {
core.setFailed(`One or more CI stages have failed. Please review the outputs of the following stepts: ${failedTasks}.`);
};

- name: ResourceCleanup
if: always()
uses: azure/CLI@v1
with:
inlinescript: |
az group delete -g ${{ matrix.AZURE_RESOURCE_GROUP }} --yes
Loading