Skip to content

Commit

Permalink
test errors fixes
Browse files Browse the repository at this point in the history
Signed-off-by: Pankaj Thorat <thorat.pankaj9@gmail.com>
  • Loading branch information
pankajskku committed Oct 5, 2024
1 parent 085a28c commit 1aeb5cc
Show file tree
Hide file tree
Showing 29 changed files with 1,438 additions and 1,675 deletions.
124 changes: 124 additions & 0 deletions .github/workflows/test-code-syntactic_concept_extractor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#
# DO NOT EDIT THIS FILE: it is generated from test-transform.template, Edit there and run make to change these files
#
name: Test - transforms/code/syntactic_concept_extractor

on:
workflow_dispatch:
push:
branches:
- "dev"
- "releases/**"
tags:
- "*"
paths:
- "transforms/code/syntactic_concept_extractor/**"
- "data-processing-lib/**"
- "!transforms/code/syntactic_concept_extractor/**/kfp_ray/**" # This is/will be tested in separate workflow
- "!data-processing-lib/**/test/**"
- "!data-processing-lib/**/test-data/**"
- "!**.md"
- "!**/doc/**"
- "!**/images/**"
- "!**.gitignore"
pull_request:
branches:
- "dev"
- "releases/**"
paths:
- "transforms/code/syntactic_concept_extractor/**"
- "data-processing-lib/**"
- "!transforms/code/syntactic_concept_extractor/**/kfp_ray/**" # This is/will be tested in separate workflow
- "!data-processing-lib/**/test/**"
- "!data-processing-lib/**/test-data/**"
- "!**.md"
- "!**/doc/**"
- "!**/images/**"
- "!**.gitignore"

jobs:
check_if_push_image:
# check whether the Docker images should be pushed to the remote repository
# The images are pushed if it is a merge to dev branch or a new tag is created.
# The latter being part of the release process.
# The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file.
runs-on: ubuntu-22.04
outputs:
publish_images: ${{ steps.version.outputs.publish_images }}
steps:
- id: version
run: |
publish_images='false'
if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
then
publish_images='true'
fi
if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
then
publish_images='true'
fi
echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT"
test-src:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Free up space in github runner
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test transform source in transforms/code/syntactic_concept_extractor
run: |
if [ -e "transforms/code/syntactic_concept_extractor/Makefile" ]; then
make -C transforms/code/syntactic_concept_extractor DOCKER=docker test-src
else
echo "transforms/code/syntactic_concept_extractor/Makefile not found - source testing disabled for this transform."
fi
test-image:
needs: [check_if_push_image]
runs-on: ubuntu-22.04
timeout-minutes: 120
env:
DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }}
DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Free up space in github runner
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test transform image in transforms/code/syntactic_concept_extractor
run: |
if [ -e "transforms/code/syntactic_concept_extractor/Makefile" ]; then
if [ -d "transforms/code/syntactic_concept_extractor/spark" ]; then
make -C data-processing-lib/spark DOCKER=docker image
fi
make -C transforms/code/syntactic_concept_extractor DOCKER=docker test-image
else
echo "transforms/code/syntactic_concept_extractor/Makefile not found - testing disabled for this transform."
fi
- name: Print space
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
docker images
- name: Publish images
if: needs.check_if_push_image.outputs.publish_images == 'true'
run: |
if [ -e "transforms/code/syntactic_concept_extractor/Makefile" ]; then
make -C transforms/code/syntactic_concept_extractor publish
else
echo "transforms/code/syntactic_concept_extractor/Makefile not found - publishing disabled for this transform."
fi
4 changes: 4 additions & 0 deletions .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ HTML2PARQUET_PYTHON_VERSION=$(DPK_VERSION)

DPK_TRANSFORMS_VERSION=$(DPK_VERSION)

SYNTACTIC_CONCEPT_EXTRACTOR_PYTHON_VERSION=$(DPK_VERSION)
SYNTACTIC_CONCEPT_EXTRACTOR_RAY_VERSION=$(DPK_VERSION)


################## ################## ################## ################## ################## ##################
# Begin versions that the repo depends on.

Expand Down
16 changes: 0 additions & 16 deletions transforms/code/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,10 @@ image::
@# Help: Recursively make $@ in all subdirs
@$(MAKE) RULE=$@ .recurse

test-image::
@# Help: Recursively make $@ in all subdirs
@$(MAKE) RULE=$@ .recurse

publish::
@# Help: Recursively make $@ in all subdirs
@$(MAKE) RULE=$@ .recurse

kind-load-image::
@# Help: Recursively make $@ in all subdirs
@$(MAKE) RULE=$@ .recurse

docker-load-image::
@# Help: Recursively make $@ in all subdirs
$(MAKE) RULE=$@ .recurse

docker-save-image::
@# Help: Recursively make $@ in all subdirs
$(MAKE) RULE=$@ .recurse

set-versions:
@# Help: Recursively $@ in all subdirs
@$(MAKE) RULE=$@ .recurse
Expand Down
3 changes: 3 additions & 0 deletions transforms/code/syntactic_concept_extractor/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ publish::
@# Help: Recursively make $@ in all subdirs
@$(MAKE) RULE=$@ .recurse

test-image:
@echo "Skipping test-image step as per configuration."

test::
@# Help: Recursively make $@ in all subdirs
@$(MAKE) RULE=$@ .recurse
Expand Down
7 changes: 7 additions & 0 deletions transforms/code/syntactic_concept_extractor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,10 @@ The implementation for UI-based offline customization tool is present [here](pyt

`streamlit run LLM_runner_app.py`

The high-level system design is as follows:

![White Background Image](sys-overview.png)

For each new target language, the offline phase is utilized to create deterministic rules by harnessing the capabilities of LLMs and working with exemplar code samples from the target language. In this process, Workflow W1 facilitates the creation of rules around syntactic structures based on exemplar code samples, while Workflow W2 is used to establish semantic dimensions for profiling. Subsequently, we derive rules that connect syntactic constructs to the predefined semantic concepts. These rules are then stored in a rule database, ready to be employed during the online phase.

In the online phase, the system dynamically generates profiling outputs for any incoming code snippets. This is achieved by extracting concepts from the snippets using the rules in the database and storing these extractions in a tabular format. The structured tabular format allows for generating additional concept columns, which are then utilized to create comprehensive profiling reports.
Binary file not shown.
Loading

0 comments on commit 1aeb5cc

Please sign in to comment.