Skip to content

Commit

Permalink
Add support for Telemetry feature
Browse files Browse the repository at this point in the history
This enables shipping telemtry data ( chaos + OCP metadata ) and prometheus
dump to a centralized location: krkn-chaos/krkn#435.
  • Loading branch information
chaitanyaenr committed Aug 11, 2023
1 parent ded4269 commit e222f78
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 0 deletions.
13 changes: 13 additions & 0 deletions config.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,16 @@ tunings:
wait_duration: $WAIT_DURATION # Duration to wait between each chaos scenario
iterations: $ITERATIONS # Number of times to execute the scenarios
daemon_mode: $DAEMON_MODE # Iterations are set to infinity which means that the kraken will cause chaos forever

telemetry:
enabled: $TELEMETRY_ENABLED # enable/disables the telemetry collection feature
api_url: $TELEMETRY_API_URL # telemetry service endpoint
username: $TELEMETRY_USERNAME # telemetry service username
password: $TELEMETRY_PASSWORD # telemetry service password
prometheus_backup: $TELEMETRY_PROMETHEUS_BACKUP # enables/disables prometheus data collection
full_prometheus_backup: $TELEMETRY_FULL_PROMETHEUS_BACKUP # if is set to False only the /prometheus/wal folder will be downloaded.
backup_threads: $TELEMTRY_BACKUP_THREADS # number of telemetry download/upload threads
archive_path: $TELEMETRY_ARCHIVE_PATH # local path where the archive files will be temporarly stored
max_retries: $TELEMETRY_MAX_RETRIES # maximum number of upload retries (if 0 will retry forever)
run_tag: $TELEMETRY_RUN_TAG # if set, this will be appended to the run folder in the bucket (useful to group the runs)
archive_size: $TELEMETRY_ARCHIVE_TAG # the size of the prometheus data archive size in KB. The lower the size of archive is
13 changes: 13 additions & 0 deletions docs/all_scenarios_env.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,16 @@ CAPTURE_METRICS | Captures metrics as specified in the profile from in-cluster
ENABLE_ALERTS | Evaluates expressions from in-cluster prometheus and exits 0 or 1 based on the severity set. [Default profile](https://github.com/redhat-chaos/krkn/blob/master/config/alerts). More details can be found [here](https://github.com/redhat-chaos/krkn#alerts) | False |
ALERTS_PATH | Path to the alerts file to use when ENABLE_ALERTS is set | config/alerts |
CHECK_CRITICAL_ALERTS | When enabled will check prometheus for critical alerts firing post chaos | False |
TELEMETRY_ENABLED | Enable/disables the telemetry collection feature | False |
TELEMETRY_API_URL | telemetry service endpoint | https://ulnmf9xv7j.execute-api.us-west-2.amazonaws.com/production |
TELEMETRY_USERNAME | telemetry service username | redhat-chaos |
TELEMETRY_PASSWORD | | No default |
TELEMETRY_PROMETHEUS_BACKUP | enables/disables prometheus data collection | True |
TELEMTRY_FULL_PROMETHEUS_BACKUP | if is set to False only the /prometheus/wal folder will be downloaded | False |
TELEMETRY_BACKUP_THREADS | number of telemetry download/upload threads | 5 |
TELEMETRY_ARCHIVE_PATH | local path where the archive files will be temporarly stored | /tmp |
TELEMETRY_MAX_RETRIES | maximum number of upload retries (if 0 will retry forever) | 0 |
TELEMETRY_RUN_TAG | if set, this will be appended to the run folder in the bucket (useful to group the runs | chaos |
TELEMETRY_ARCHIVE_SIZE | the size of the prometheus data archive size in KB. The lower the size of archive is | 1000 |

**NOTE**: For setting the TELEMETRY_ARCHIVE_SIZE,the higher the number of archive files will be produced and uploaded (and processed by backup_thread simultaneously).For unstable/slow connection is better to keep this value low increasing the number of backup_threads, in this way, on upload failure, the retry will happen only on the failed chunk without affecting the whole upload.
11 changes: 11 additions & 0 deletions env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,17 @@ export ALERTS_PATH=${ALERTS_PATH:=config/alerts}
export ES_SERVER=${ES_SERVER:=http://0.0.0.0:9200}
export CHECK_CRITICAL_ALERTS=${CHECK_CRITICAL_ALERTS:=False}
export KUBE_BURNER_URL=${KUBE_BURNER_URL:=https://github.com/cloud-bulldozer/kube-burner/releases/download/v1.7.0/kube-burner-1.7.0-Linux-x86_64.tar.gz}
export TELEMETRY_ENABLED=${TELEMETRY_ENABLED:=False}
export TELEMETRY_API_URL=${TELEMETRY_API_URL:=https://ulnmf9xv7j.execute-api.us-west-2.amazonaws.com/production}
export TELEMETRY_USERNAME=${TELEMETRY_USERNAME:=redhat-chaos}
export TELEMETRY_PASSWORD=${TELEMETRY_PASSWORD}
export TELEMETRY_PROMETHEUS_BACKUP=${TELEMETRY_PROMETHEUS_BACKUP:=True}
export TELEMTRY_FULL_PROMETHEUS_BACKUP=${TELEMETRY_FULL_PROMETHEUS_BACKUP:=False}
export TELEMETRY_BACKUP_THREADS=${TELEMETRY_BACKUP_THREADS:=5}
export TELEMETRY_ARCHIVE_PATH=${TELEMETRY_ARCHIVE_PATH:=/tmp}
export TELEMETRY_MAX_RETRIES=${TELEMETRY_MAX_RETRIES:=0}
export TELEMETRY_RUN_TAG=${TELEMETRY_RUN_TAG:=chaos}
export TELEMETRY_ARCHIVE_SIZE=${TELEMETRY_ARCHIVE_SIZE:=1000}

# Unset KUBECONFIG to make sure mounted kubeconfig is used
unset KUBECONFIG

0 comments on commit e222f78

Please sign in to comment.