Skip to content

Improve BPE

Improve BPE #1511

Workflow file for this run

name: Linux (Ubuntu 22.04, Python 3.11)
on:
workflow_dispatch:
pull_request:
merge_group:
push:
branches:
- master
- 'releases/**'
concurrency:
# github.ref is not unique in post-commit
group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-linux
cancel-in-progress: true
env:
PYTHON_VERSION: '3.11'
OV_BRANCH: ${{ github.base_ref || github.event.merge_group.base_ref || github.ref }}
permissions: read-all
jobs:
openvino_download:
name: Download prebuilt OpenVINO
outputs:
status: ${{ steps.openvino_download.outcome }}
ov_wheel_source: ${{ steps.openvino_download.outputs.ov_wheel_source }}
ov_version: ${{ steps.openvino_download.outputs.ov_version }}
timeout-minutes: 10
defaults:
run:
shell: bash
runs-on: aks-linux-2-cores-8gb
container:
image: 'openvinogithubactions.azurecr.io/openvino_provider:0.1.0'
volumes: [ "/mount:/mount" ]
steps:
- uses: openvinotoolkit/openvino/.github/actions/openvino_provider@master
id: openvino_download
with:
platform: 'ubuntu22'
revision: 'latest_available_commit'
openvino_tokenizers_cpack:
name: OpenVINO tokenizers cpack (BUILD_FAST_TOKENIZERS=${{ matrix.build_fast_tokenizers }}, BUILD_TYPE=${{ matrix.build_type }})
strategy:
matrix:
build_fast_tokenizers: [ON]
build_type: [Release] # TODO: Add Debug build when OV provider is ready or use OV package
needs: [ openvino_download ]
if: |
always() &&
(needs.openvino_download.outputs.status == 'success')
timeout-minutes: 45
defaults:
run:
shell: bash
runs-on: ubuntu-22.04
env:
CMAKE_GENERATOR: 'Ninja'
OPENVINO_REPO: ${{ github.workspace }}/openvino
INSTALL_DIR: ${{ github.workspace }}/openvino/install
OV_TOKENIZERS_INSTALL_DIR: ${{ github.workspace }}/openvino_tokenizers/install
OPENVINO_TOKENIZERS_REPO: ${{ github.workspace }}/openvino_tokenizers
BUILD_DIR: ${{ github.workspace }}/openvino_tokenizers/build
steps:
- name: Clone Openvino tokenizers
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
path: ${{ env.OPENVINO_TOKENIZERS_REPO }}
- name: Clone Openvino
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
repository: 'openvinotoolkit/openvino'
path: ${{ env.OPENVINO_REPO }}
ref: ${{ env.OV_BRANCH }}
sparse-checkout: |
install_build_dependencies.sh
- name: Download OpenVINO package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
path: ${{ env.INSTALL_DIR }}
merge-multiple: true
#
# Dependencies
#
- name: Install build dependencies
run: sudo ${{ env.OPENVINO_REPO }}/install_build_dependencies.sh
#
# Build
#
- name: CMake configure - tokenizers
run: |
source ${INSTALL_DIR}/setupvars.sh
cmake -DBUILD_FAST_TOKENIZERS="${{ matrix.build_fast_tokenizers }}" \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-S ${{ env.OPENVINO_TOKENIZERS_REPO }} \
-B ${{ env.BUILD_DIR }}
- name: Cmake build - tokenizers
run: cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ matrix.build_type }} --verbose
env:
CMAKE_BUILD_PARALLEL_LEVEL: '4'
- name: Cmake install - tokenizers
run: |
cmake --install ${{ env.BUILD_DIR }} --config=${{ matrix.build_type }} --prefix=${{ env.OV_TOKENIZERS_INSTALL_DIR }}/ov_tokenizers
- name: Pack Artifacts
run: |
pushd ${OV_TOKENIZERS_INSTALL_DIR}
tar -czvf ${BUILD_DIR}/ov_tokenizers.tar.gz *
popd
#
# Upload build artifacts
#
- name: Upload openvino tokenizers package
if: ${{ always() }}
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: openvino_tokenizers_cpack_${{ matrix.build_fast_tokenizers }}_${{ matrix.build_type }}
path: ${{ env.BUILD_DIR }}/*.tar.gz
if-no-files-found: 'error'
openvino_tokenizers_wheel:
name: OpenVINO tokenizers extension (BUILD_FAST_TOKENIZERS=${{ matrix.build_fast_tokenizers }})
strategy:
matrix:
build_fast_tokenizers: [ON, OFF]
needs: [ openvino_download ]
if: |
always() &&
(needs.openvino_download.outputs.status == 'success')
timeout-minutes: 25
defaults:
run:
shell: bash
runs-on: ubuntu-22.04
env:
OPENVINO_REPO: ${{ github.workspace }}/openvino
INSTALL_DIR: ${{ github.workspace }}/openvino/install
OPENVINO_TOKENIZERS_REPO: ${{ github.workspace }}/openvino_tokenizers
BUILD_DIR: ${{ github.workspace }}/openvino_tokenizers/build
steps:
- name: Clone Openvino tokenizers
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
path: ${{ env.OPENVINO_TOKENIZERS_REPO }}
- name: Clone Openvino
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
repository: 'openvinotoolkit/openvino'
path: ${{ env.OPENVINO_REPO }}
ref: ${{ env.OV_BRANCH }}
sparse-checkout: |
install_build_dependencies.sh
- name: Setup Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: Download OpenVINO package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
path: ${{ env.INSTALL_DIR }}
merge-multiple: true
#
# Dependencies
#
- name: Install build dependencies
run: sudo ${{ env.OPENVINO_REPO }}/install_build_dependencies.sh
#
# Build
#
- name: Build tokenizers wheel
run: |
python -m pip wheel -v --no-deps --wheel-dir ${BUILD_DIR} \
--config-settings=override=cross.arch="manylinux_2_31_x86_64" \
--config-settings=override=cmake.options.BUILD_FAST_TOKENIZERS="${{ matrix.build_fast_tokenizers }}" \
${{ needs.openvino_download.outputs.ov_wheel_source }} \
${OPENVINO_TOKENIZERS_REPO}
env:
CMAKE_BUILD_PARALLEL_LEVEL: '4'
CMAKE_GENERATOR: 'Unix Makefiles'
working-directory: ${{ env.INSTALL_DIR }}
#
# Upload build artifacts
#
- name: Upload openvino tokenizers wheel
if: ${{ always() }}
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: openvino_tokenizers_wheel_${{ matrix.build_fast_tokenizers }}
path: ${{ env.BUILD_DIR }}/*.whl
if-no-files-found: 'error'
openvino_tokenizers_tests:
name: OpenVINO tokenizers tests (BUILD_FAST_TOKENIZERS=${{ matrix.build_fast_tokenizers }})
strategy:
matrix:
build_fast_tokenizers: [ON, OFF]
needs: [ openvino_download, openvino_tokenizers_wheel]
if: always() && needs.openvino_tokenizers_wheel.result == 'success'
timeout-minutes: 45
defaults:
run:
shell: bash
runs-on: ubuntu-22.04
env:
OPENVINO_REPO: ${{ github.workspace }}/openvino
INSTALL_DIR: ${{ github.workspace }}/openvino/install
OPENVINO_TOKENIZERS_REPO: ${{ github.workspace }}/openvino_tokenizers
BUILD_DIR: ${{ github.workspace }}/openvino_tokenizers/build
steps:
- name: Clone Openvino tokenizers sources and tests
uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
with:
path: ${{ env.OPENVINO_TOKENIZERS_REPO }}
- name: Setup Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: Download tokenizers package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: openvino_tokenizers_wheel_${{ matrix.build_fast_tokenizers }}
path: ${{ env.INSTALL_DIR }}/ov_tokenizers
- name: Download OpenVINO package
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
path: ${{ env.INSTALL_DIR }}
merge-multiple: true
- name: Install OpenVINO Python wheel from pre-built artifacts
run: |
python3 -m pip install openvino==${{ needs.openvino_download.outputs.ov_version }} ${{ needs.openvino_download.outputs.ov_wheel_source }}
working-directory: ${{ env.INSTALL_DIR }}
- name: Install OpenVINO tokenizers wheel
run: |
# Find and install wheel
pushd ${INSTALL_DIR}/ov_tokenizers
wheel_name=$(find . -name 'openvino_tokenizers*.whl')
python3 -m pip install $wheel_name[dev]
popd
env:
PIP_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
- name: Tokenizers regression tests (using openvino python modules)
run: |
source ${INSTALL_DIR}/setupvars.sh
python3 -m pytest tokenizers_test.py layer_tests.py
working-directory: ${{ env.OPENVINO_TOKENIZERS_REPO }}/tests