Skip to content

Commit a8c0b1f

Browse files
committed
ARROW-15417: [Python][Packaging] Use vcpkg manifest to install wheel dependencies; downgrade AWS SDK by building the bundled version
Closes #12227 from kszucs/win-wheel-crash Authored-by: Krisztián Szűcs <[email protected]> Signed-off-by: Krisztián Szűcs <[email protected]>
1 parent 8e34b64 commit a8c0b1f

15 files changed

+270
-180
lines changed

ci/docker/java-jni-manylinux-201x.dockerfile

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,16 @@ ARG base
1919
FROM ${base}
2020

2121
# Install the libaries required by the Gandiva to run
22-
RUN vcpkg install --clean-after-build \
23-
boost-algorithm \
24-
boost-crc \
25-
boost-date-time \
26-
boost-format \
27-
boost-locale \
28-
boost-multiprecision \
29-
boost-predef \
30-
boost-regex \
31-
boost-system \
32-
boost-variant \
33-
# Use enable rtti to avoid link problems in Gandiva
34-
llvm[clang,default-options,default-targets,lld,tools,enable-rtti]
22+
# Use enable llvm[enable-rtti] in the vcpkg.json to avoid link problems in Gandiva
23+
RUN vcpkg install \
24+
--clean-after-build \
25+
--x-install-root=${VCPKG_ROOT}/installed \
26+
--x-manifest-root=/arrow/ci/vcpkg \
27+
--x-feature=flight \
28+
--x-feature=gcs \
29+
--x-feature=json \
30+
--x-feature=parquet \
31+
--x-feature=gandiva
3532

3633
# Install Java
3734
ARG java=1.8.0

ci/docker/python-wheel-manylinux-201x.dockerfile

Lines changed: 20 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -53,49 +53,35 @@ COPY ci/vcpkg/*.patch \
5353
COPY ci/scripts/install_vcpkg.sh \
5454
ci/scripts/install_glibc.sh \
5555
arrow/ci/scripts/
56-
RUN arrow/ci/scripts/install_vcpkg.sh /opt/vcpkg ${vcpkg} && \
56+
ENV VCPKG_ROOT=/opt/vcpkg
57+
RUN arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg} && \
5758
if [ "${manylinux}" == "2010" ]; then \
5859
arrow/ci/scripts/install_glibc.sh ${glibc} /opt/glibc-${glibc} && \
59-
patchelf --set-interpreter /opt/glibc-2.18/lib/ld-linux-x86-64.so.2 /opt/vcpkg/vcpkg && \
60-
patchelf --set-rpath /opt/glibc-2.18/lib:/usr/lib64 /opt/vcpkg/vcpkg; \
60+
patchelf --set-interpreter /opt/glibc-2.18/lib/ld-linux-x86-64.so.2 ${VCPKG_ROOT}/vcpkg && \
61+
patchelf --set-rpath /opt/glibc-2.18/lib:/usr/lib64 ${VCPKG_ROOT}/vcpkg; \
6162
fi
62-
ENV PATH="/opt/vcpkg:${PATH}"
63+
ENV PATH="${PATH}:${VCPKG_ROOT}"
6364

6465
ARG build_type=release
6566
ENV CMAKE_BUILD_TYPE=${build_type} \
6667
VCPKG_FORCE_SYSTEM_BINARIES=1 \
6768
VCPKG_OVERLAY_TRIPLETS=/arrow/ci/vcpkg \
6869
VCPKG_DEFAULT_TRIPLET=${arch_short}-linux-static-${build_type} \
69-
VCPKG_FEATURE_FLAGS=-manifests
70-
71-
# Need to install the boost-build prior installing the boost packages, otherwise
72-
# vcpkg will raise an error.
73-
# TODO(kszucs): factor out the package enumeration to a text file and reuse it
74-
# from the windows image and potentially in a future macos wheel build
75-
RUN vcpkg install --clean-after-build \
76-
abseil \
77-
aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \
78-
boost-filesystem \
79-
brotli \
80-
bzip2 \
81-
c-ares \
82-
curl \
83-
flatbuffers \
84-
gflags \
85-
glog \
86-
google-cloud-cpp[core,storage] \
87-
grpc \
88-
lz4 \
89-
openssl \
90-
orc \
91-
protobuf \
92-
rapidjson \
93-
re2 \
94-
snappy \
95-
thrift \
96-
utf8proc \
97-
zlib \
98-
zstd
70+
VCPKG_FEATURE_FLAGS="manifests"
71+
COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/
72+
# cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains
73+
# ssl related fixies as well as we can patch the vcpkg portfile to support
74+
# arm machines it hits ARROW-15141 where we would need to fall back to 1.8.186
75+
# but we cannot patch those portfiles since vcpkg-tool handles the checkout of
76+
# previous versions => use bundled S3 build
77+
RUN vcpkg install \
78+
--clean-after-build \
79+
--x-install-root=${VCPKG_ROOT}/installed \
80+
--x-manifest-root=/arrow/ci/vcpkg \
81+
--x-feature=flight \
82+
--x-feature=gcs \
83+
--x-feature=json \
84+
--x-feature=parquet
9985

10086
ARG python=3.8
10187
ENV PYTHON_VERSION=${python}

ci/docker/python-wheel-windows-vs2017.dockerfile

Lines changed: 27 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ COPY ci/vcpkg/*.patch \
3636
ci/vcpkg/*windows*.cmake \
3737
arrow/ci/vcpkg/
3838
COPY ci/scripts/install_vcpkg.sh arrow/ci/scripts/
39+
ENV VCPKG_ROOT=C:\\vcpkg
3940
RUN bash arrow/ci/scripts/install_vcpkg.sh /c/vcpkg %vcpkg% && \
40-
setx PATH "%PATH%;C:\vcpkg"
41+
setx PATH "%PATH%;%VCPKG_ROOT%"
4142

4243
# Configure vcpkg and install dependencies
4344
# NOTE: use windows batch environment notation for build arguments in RUN
@@ -48,34 +49,21 @@ ARG build_type=release
4849
ENV CMAKE_BUILD_TYPE=${build_type} \
4950
VCPKG_OVERLAY_TRIPLETS=C:\\arrow\\ci\\vcpkg \
5051
VCPKG_DEFAULT_TRIPLET=amd64-windows-static-md-${build_type} \
51-
VCPKG_FEATURE_FLAGS=-manifests
52-
53-
RUN vcpkg install --clean-after-build \
54-
abseil \
55-
aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \
56-
boost-filesystem \
57-
boost-multiprecision \
58-
boost-system \
59-
brotli \
60-
bzip2 \
61-
c-ares \
62-
curl \
63-
flatbuffers \
64-
gflags \
65-
glog \
66-
google-cloud-cpp[core,storage] \
67-
grpc \
68-
lz4 \
69-
openssl \
70-
orc \
71-
protobuf \
72-
rapidjson \
73-
re2 \
74-
snappy \
75-
thrift \
76-
utf8proc \
77-
zlib \
78-
zstd
52+
VCPKG_FEATURE_FLAGS="manifests"
53+
COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/
54+
# cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains
55+
# ssl related fixies as well as we can patch the vcpkg portfile to support
56+
# arm machines it hits ARROW-15141 where we would need to fall back to 1.8.186
57+
# but we cannot patch those portfiles since vcpkg-tool handles the checkout of
58+
# previous versions => use bundled S3 build
59+
RUN vcpkg install \
60+
--clean-after-build \
61+
--x-install-root=%VCPKG_ROOT%\installed \
62+
--x-manifest-root=arrow/ci/vcpkg \
63+
--x-feature=flight \
64+
--x-feature=gcs \
65+
--x-feature=json \
66+
--x-feature=parquet
7967

8068
# Remove previous installations of python from the base image
8169
# NOTE: a more recent base image (tried with 2.12.1) comes with python 3.9.7
@@ -87,20 +75,20 @@ RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
8775

8876
# Define the full version number otherwise choco falls back to patch number 0 (3.7 => 3.7.0)
8977
ARG python=3.8
90-
RUN (if "%python%"=="3.7" setx PYTHON_VERSION 3.7.12) & \
91-
(if "%python%"=="3.8" setx PYTHON_VERSION 3.8.11) & \
92-
(if "%python%"=="3.9" setx PYTHON_VERSION 3.9.9) & \
93-
(if "%python%"=="3.10" setx PYTHON_VERSION 3.10.1)
78+
RUN (if "%python%"=="3.7" setx PYTHON_VERSION "3.7.9" && setx PATH "%PATH%;C:\Python37;C:\Python37\Scripts") & \
79+
(if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
80+
(if "%python%"=="3.9" setx PYTHON_VERSION "3.9.7" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
81+
(if "%python%"=="3.10" setx PYTHON_VERSION "3.10.2" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts")
9482
RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION%
95-
RUN pip install -U pip
83+
RUN python -m pip install -U pip setuptools
9684

9785
COPY python/requirements-wheel-build.txt arrow/python/
98-
RUN pip install -r arrow/python/requirements-wheel-build.txt
86+
RUN python -m pip install -r arrow/python/requirements-wheel-build.txt
9987

100-
ENV CLCACHE_DIR="C:\clcache"
101-
ENV CLCACHE_COMPRESS=1
102-
ENV CLCACHE_COMPRESSLEVEL=6
103-
RUN pip install git+https://github.com/Nuitka/clcache.git
88+
# ENV CLCACHE_DIR="C:\clcache"
89+
# ENV CLCACHE_COMPRESS=1
90+
# ENV CLCACHE_COMPRESSLEVEL=6
91+
# RUN pip install git+https://github.com/Nuitka/clcache.git
10492

10593
# For debugging purposes
10694
# RUN wget --no-check-certificate https://github.com/lucasg/Dependencies/releases/download/v1.10/Dependencies_x64_Release.zip

ci/scripts/java_jni_manylinux_build.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,6 @@ export ARROW_TEST_DATA="${arrow_dir}/testing/data"
5555
export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
5656
export AWS_EC2_METADATA_DISABLED=TRUE
5757

58-
# NOTE(kszucs): workaround for ARROW-15403 along with the ORC_* cmake variables
59-
vcpkg remove orc
60-
6158
mkdir -p "${build_dir}"
6259
pushd "${build_dir}"
6360

ci/scripts/python_wheel_macos_build.sh

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,11 @@ echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
8787
: ${VCPKG_FEATURE_FLAGS:=-manifests}
8888
: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-osx-static-${CMAKE_BUILD_TYPE}}}
8989

90-
# NOTE(kszucs): workaround for ARROW-15403 along with the ORC_* cmake variables
91-
vcpkg remove orc
92-
9390
mkdir -p ${build_dir}/build
9491
pushd ${build_dir}/build
9592

9693
cmake \
9794
-DARROW_BUILD_SHARED=ON \
98-
-DCMAKE_APPLE_SILICON_PROCESSOR=arm64 \
99-
-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} \
10095
-DARROW_BUILD_STATIC=OFF \
10196
-DARROW_BUILD_TESTS=OFF \
10297
-DARROW_DATASET=${ARROW_DATASET} \
@@ -124,13 +119,16 @@ cmake \
124119
-DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \
125120
-DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \
126121
-DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \
122+
-DAWSSDK_SOURCE=BUNDLED \
123+
-DCMAKE_APPLE_SILICON_PROCESSOR=arm64 \
127124
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
128125
-DCMAKE_INSTALL_LIBDIR=lib \
129126
-DCMAKE_INSTALL_PREFIX=${build_dir}/install \
127+
-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} \
130128
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
131129
-DOPENSSL_USE_STATIC_LIBS=ON \
132-
-DORC_SOURCE=BUNDLED \
133130
-DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \
131+
-DORC_SOURCE=BUNDLED \
134132
-DVCPKG_MANIFEST_MODE=OFF \
135133
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
136134
-G ${CMAKE_GENERATOR} \

ci/scripts/python_wheel_manylinux_build.sh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,9 @@ if [[ "$(uname -m)" == arm* ]] || [[ "$(uname -m)" == aarch* ]]; then
8080
export ARROW_EXTRA_CMAKE_FLAGS="-DARROW_JEMALLOC_LG_PAGE=16"
8181
fi
8282

83-
# NOTE(kszucs): workaround for ARROW-15403 along with the ORC_* cmake variables
84-
vcpkg remove orc
85-
8683
mkdir /tmp/arrow-build
8784
pushd /tmp/arrow-build
85+
8886
cmake \
8987
-DARROW_BROTLI_USE_SHARED=OFF \
9088
-DARROW_BUILD_SHARED=ON \
@@ -115,13 +113,14 @@ cmake \
115113
-DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \
116114
-DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \
117115
-DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \
116+
-DAWSSDK_SOURCE=BUNDLED \
118117
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
119118
-DCMAKE_INSTALL_LIBDIR=lib \
120119
-DCMAKE_INSTALL_PREFIX=/tmp/arrow-dist \
121120
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
122121
-DOPENSSL_USE_STATIC_LIBS=ON \
123-
-DORC_SOURCE=BUNDLED \
124122
-DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \
123+
-DORC_SOURCE=BUNDLED \
125124
-DVCPKG_MANIFEST_MODE=OFF \
126125
-DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
127126
${ARROW_EXTRA_CMAKE_FLAGS} \

ci/scripts/python_wheel_windows_build.bat

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,6 @@ set VCPKG_ROOT=C:\vcpkg
5151
set VCPKG_FEATURE_FLAGS=-manifests
5252
set VCGPK_TARGET_TRIPLET=amd64-windows-static-md-%CMAKE_BUILD_TYPE%
5353

54-
@rem NOTE(kszucs): workaround for ARROW-15403 along with the ORC_* cmake variables
55-
vcpkg remove orc
56-
5754
mkdir C:\arrow-build
5855
pushd C:\arrow-build
5956
cmake ^
@@ -80,13 +77,12 @@ cmake ^
8077
-DARROW_WITH_SNAPPY=%ARROW_WITH_SNAPPY% ^
8178
-DARROW_WITH_ZLIB=%ARROW_WITH_ZLIB% ^
8279
-DARROW_WITH_ZSTD=%ARROW_WITH_ZSTD% ^
80+
-DAWSSDK_SOURCE=BUNDLED ^
8381
-DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^
8482
-DCMAKE_CXX_COMPILER=clcache ^
8583
-DCMAKE_INSTALL_PREFIX=C:\arrow-dist ^
8684
-DCMAKE_UNITY_BUILD=%CMAKE_UNITY_BUILD% ^
8785
-DMSVC_LINK_VERBOSE=ON ^
88-
-DORC_SOURCE=BUNDLED ^
89-
-DORC_PROTOBUF_EXECUTABLE=%VCPKG_ROOT%\installed\%VCGPK_TARGET_TRIPLET%\tools\protobuf\protoc.exe ^
9086
-DVCPKG_MANIFEST_MODE=OFF ^
9187
-DVCPKG_TARGET_TRIPLET=%VCGPK_TARGET_TRIPLET% ^
9288
-G "%CMAKE_GENERATOR%" ^

ci/scripts/python_wheel_windows_test.bat

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B
4141
python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B
4242

4343
@REM Test that the modules are importable
44-
python -c "import pyarrow"
45-
python -c "import pyarrow._hdfs"
46-
python -c "import pyarrow._s3fs"
47-
python -c "import pyarrow.csv"
48-
python -c "import pyarrow.dataset"
49-
python -c "import pyarrow.flight"
50-
python -c "import pyarrow.fs"
51-
python -c "import pyarrow.json"
52-
python -c "import pyarrow.parquet"
44+
python -c "import pyarrow" || exit /B
45+
python -c "import pyarrow._hdfs" || exit /B
46+
python -c "import pyarrow._s3fs" || exit /B
47+
python -c "import pyarrow.csv" || exit /B
48+
python -c "import pyarrow.dataset" || exit /B
49+
python -c "import pyarrow.flight" || exit /B
50+
python -c "import pyarrow.fs" || exit /B
51+
python -c "import pyarrow.json" || exit /B
52+
python -c "import pyarrow.parquet" || exit /B
5353

5454
@REM Execute unittest
5555
pytest -r s --pyargs pyarrow || exit /B

ci/vcpkg/ports.patch

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -164,20 +164,20 @@ index 00000000..4566cc66
164164
+- " set(LibCrypto_SHARED_LIBRARY ${OPENSSL_CRYPTO_LIBRARY} CACHE INTERNAL \"The OpenSSL crypto shared library\")\n"
165165
+- "endif()\n"
166166
+ )
167-
+
167+
+
168168
+ if (NOT SIMPLE_INSTALL)
169169
diff --git a/ports/aws-sdk-cpp/portfile.cmake b/ports/aws-sdk-cpp/portfile.cmake
170170
index 2d6bba4d..0ac47887 100644
171171
--- a/ports/aws-sdk-cpp/portfile.cmake
172172
+++ b/ports/aws-sdk-cpp/portfile.cmake
173173
@@ -8,6 +8,7 @@ vcpkg_from_github(
174-
PATCHES
175-
patch-relocatable-rpath.patch
176-
fix-aws-root.patch
177-
+ fix-find-crypto.patch
178-
)
179-
180-
string(COMPARE EQUAL "${VCPKG_CRT_LINKAGE}" "dynamic" FORCE_SHARED_CRT)
174+
PATCHES
175+
patch-relocatable-rpath.patch
176+
fix-aws-root.patch
177+
+ fix-find-crypto.patch
178+
)
179+
180+
string(COMPARE EQUAL "${VCPKG_CRT_LINKAGE}" "dynamic" FORCE_SHARED_CRT)
181181
diff --git a/ports/aws-sdk-cpp/vcpkg.json b/ports/aws-sdk-cpp/vcpkg.json
182182
index 3836e2b6..cd88ef07 100644
183183
--- a/ports/aws-sdk-cpp/vcpkg.json
@@ -216,7 +216,7 @@ index 45b8c706..b409d8a7 100644
216216
HEAD_REF master
217217
+ PATCHES "snappy-disable-bmi.patch"
218218
)
219-
219+
220220
vcpkg_cmake_configure(
221221
diff --git a/ports/snappy/snappy-disable-bmi.patch b/ports/snappy/snappy-disable-bmi.patch
222222
new file mode 100644
@@ -241,5 +241,5 @@ index 00000000..a57ce0c2
241241
+ return v & ~(mask << (8 * n));
242242
+-#endif
243243
+ }
244-
+
244+
+
245245
+ static inline bool LeftShiftOverflows(uint8_t value, uint32_t shift) {

0 commit comments

Comments
 (0)