@@ -41,6 +41,7 @@ RUN ( apt-get update || \
4141 git \
4242 python3 \
4343 python3-pip \
44+ xz-utils \
4445 ninja-build \
4546 software-properties-common \
4647 apt-transport-https \
@@ -49,6 +50,21 @@ RUN ( apt-get update || \
4950 lsb-release \
5051 && rm -rf /var/lib/apt/lists/*
5152
53+ # Install zstd (for icu/compress-data.ts). Pinned: focal's apt has 1.4.4 which
54+ # compresses meaningfully worse than 1.5.x; this matches Bun's vendored decoder.
55+ ARG ZSTD_VERSION=1.5.7
56+ RUN curl -fsSL "https://github.com/facebook/zstd/releases/download/v${ZSTD_VERSION}/zstd-${ZSTD_VERSION}.tar.gz" | tar xz -C /tmp \
57+ && make -C /tmp/zstd-${ZSTD_VERSION}/programs zstd -j$(nproc) \
58+ && cp /tmp/zstd-${ZSTD_VERSION}/programs/zstd /usr/local/bin/ \
59+ && rm -rf /tmp/zstd-${ZSTD_VERSION} \
60+ && zstd --version
61+
62+ # Install Node (for icu/compress-data.ts; needs >=23.6 for default type stripping)
63+ ARG NODE_VERSION=24.16.0
64+ RUN curl -fsSL "https://nodejs.org/dist/v${NODE_VERSION}/node-v${NODE_VERSION}-linux-$(uname -m | sed 's/x86_64/x64/;s/aarch64/arm64/').tar.xz" \
65+ | tar -xJ -C /usr/local --strip-components=1 \
66+ && node --version
67+
5268# Install modern CMake for Ubuntu
5369RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null \
5470 && apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" \
@@ -177,12 +193,17 @@ RUN echo "#include <iostream>\n#include <numbers>\nint main() { std::cout << std
177193
178194# Download and build ICU.
179195#
196+ # After tar, patch udata.cpp with a per-item decompression hook (a weak extern
197+ # Bun defines; null in ICU's own tools).
198+ #
180199# After the first `make` (which produces bin/icupkg), filter data/in/icudt75l.dat
181- # to drop converters/translit/rbnf/stringprep/confusables/unames, then rebuild
182- # the data target. Bun has zero ucnv_/utrans_/usprep_/uspoof_ consumers
183- # (TextCodecICU is removed in src/bun.js/bindings/TextEncodingRegistry.cpp and
184- # UCONFIG_NO_LEGACY_CONVERSION=1 is set below), so this is unreachable data.
185- # Cuts libicudata.a by ~7.4 MB with no observable change.
200+ # to drop converters/translit/rbnf/stringprep/confusables/unames — Bun has zero
201+ # ucnv_/utrans_/usprep_/uspoof_ consumers — then rebuild.
202+ #
203+ # Finally, repack the filtered .dat with per-item zstd (icu/compress-data.ts).
204+ # Items matching icu/keep-raw.txt stay uncompressed (too expensive to decode lazily).
205+ # The repacked libicudata.a also embeds the trained zstd dictionary.
206+ COPY icu/ /icu-bun/
186207ADD https://github.com/unicode-org/icu/releases/download/release-75-1/icu4c-75_1-src.tgz /icu.tgz
187208RUN --mount=type=tmpfs,target=/icu \
188209 export CFLAGS="$CFLAGS -Os -std=c17 $LTO_FLAG" && \
@@ -191,14 +212,16 @@ RUN --mount=type=tmpfs,target=/icu \
191212 cd /icu && \
192213 tar -xf /icu.tgz --strip-components=1 && \
193214 rm /icu.tgz && \
215+ patch -p1 < /icu-bun/udata-decompress-hook.patch && \
194216 cd source && \
195217 ./configure --enable-static --disable-shared --disable-layoutex --disable-layout --with-data-packaging=static --disable-samples --disable-debug --disable-tests --disable-extras --disable-icuio && \
196218 make -j$(nproc) && \
197219 bin/icupkg -l data/in/icudt75l.dat | grep -E '\. (cnv|spp|cfu)$|^cnvalias\. icu$|^translit/|^rbnf/|^unames\. icu$' > data/in/rm.lst && \
198220 bin/icupkg --auto_toc_prefix -r data/in/rm.lst data/in/icudt75l.dat data/in/icudt75l_filtered.dat && \
199221 mv -f data/in/icudt75l_filtered.dat data/in/icudt75l.dat && \
200222 rm -rf data/out lib/libicudata.a && make -j$(nproc) && \
201- make install && cp -r /icu/source/lib/* /output/lib && cp -r /icu/source/i18n/unicode/* /icu/source/common/unicode/* /output/include/unicode
223+ make install && cp -r /icu/source/lib/* /output/lib && cp -r /icu/source/i18n/unicode/* /icu/source/common/unicode/* /output/include/unicode && \
224+ node --experimental-strip-types /icu-bun/compress-data.ts data/in/icudt75l.dat /output/lib/libicudata.a --skip /icu-bun/keep-raw.txt --icupkg bin/icupkg
202225
203226# Copy WebKit source and build
204227COPY . /webkit
0 commit comments