From 00d89115b2c3eb5b3638ea681f2a7f3d9bfe4c71 Mon Sep 17 00:00:00 2001 From: Mars Lan Date: Wed, 22 Jul 2020 19:39:58 -0700 Subject: [PATCH] feat(gms): add postgres & mariadb supports to GMS (#1742) * feat(gms): add postgres & mariadb supports to GMS Also add corresponding docker-compose files * Update README.md --- build.gradle | 2 ++ docker/gms/README.md | 14 ++++++++- docker/gms/docker-compose-mariadb.yml | 30 ++++++++++++++++++++ docker/gms/docker-compose-postgres.yml | 30 ++++++++++++++++++++ docker/mariadb/README.md | 39 ++++++++++++++++++++++++++ docker/mariadb/docker-compose.yml | 21 ++++++++++++++ docker/mariadb/init.sql | 28 ++++++++++++++++++ docker/mysql/README.md | 3 +- docker/postgresql/README.md | 39 ++++++++++++++++++++++++++ docker/postgresql/docker-compose.yml | 19 +++++++++++++ docker/postgresql/init.sql | 28 ++++++++++++++++++ gms/README.md | 5 ---- gms/war/build.gradle | 2 ++ 13 files changed, 253 insertions(+), 7 deletions(-) create mode 100644 docker/gms/docker-compose-mariadb.yml create mode 100644 docker/gms/docker-compose-postgres.yml create mode 100644 docker/mariadb/README.md create mode 100644 docker/mariadb/docker-compose.yml create mode 100644 docker/mariadb/init.sql create mode 100644 docker/postgresql/README.md create mode 100644 docker/postgresql/docker-compose.yml create mode 100644 docker/postgresql/init.sql diff --git a/build.gradle b/build.gradle index f928fb37a7c69..ba68c06a57180 100644 --- a/build.gradle +++ b/build.gradle @@ -55,6 +55,7 @@ project.ext.externalDependency = [ 'kafkaStreams': 'org.apache.kafka:kafka-streams:2.3.0', 'logbackClassic': 'ch.qos.logback:logback-classic:1.2.3', 'lombok': 'org.projectlombok:lombok:1.18.12', + 'mariadbConnector': 'org.mariadb.jdbc:mariadb-java-client:2.6.0', 'mockito': 'org.mockito:mockito-core:3.0.0', 'mysqlConnector': 'mysql:mysql-connector-java:5.1.47', "neo4jHarness": "org.neo4j.test:neo4j-harness:3.4.11", @@ -64,6 +65,7 @@ project.ext.externalDependency = [ 'playGuice': 'com.typesafe.play:play-guice_2.11:2.6.18', 'playJavaJdbc': 'com.typesafe.play:play-java-jdbc_2.11:2.6.18', 'playTest': 'com.typesafe.play:play-test_2.11:2.6.18', + 'postgresql': 'org.postgresql:postgresql:42.2.14', 'reflections': 'org.reflections:reflections:0.9.11', "rythmEngine": "org.rythmengine:rythm-engine:1.3.0", 'servletApi': 'javax.servlet:javax.servlet-api:3.1.0', diff --git a/docker/gms/README.md b/docker/gms/README.md index f63a4136bcf10..2620ab6a2e4b2 100644 --- a/docker/gms/README.md +++ b/docker/gms/README.md @@ -67,4 +67,16 @@ environment: - NEO4J_USERNAME=neo4j - NEO4J_PASSWORD=datahub ``` -The value of `NEO4J_URI` variable should be set to the host name of the `neo4j` container within the Docker network. \ No newline at end of file +The value of `NEO4J_URI` variable should be set to the host name of the `neo4j` container within the Docker network. + +## Other Database Platforms +While GMS defaults to using MySQL as its storage backend, it is possible to switch to any of the +[database platforms](https://ebean.io/docs/database/) supported by Ebean. +For example, you can run the following command to start a GMS that connects to a PostgreSQL backend +``` +cd docker/gms && docker-compose -f docker-compose-postgres.yml up --build +``` +or a MariaDB backend +``` +cd docker/gms && docker-compose -f docker-compose-mariadb.yml up --build +``` diff --git a/docker/gms/docker-compose-mariadb.yml b/docker/gms/docker-compose-mariadb.yml new file mode 100644 index 0000000000000..435d17ea957cf --- /dev/null +++ b/docker/gms/docker-compose-mariadb.yml @@ -0,0 +1,30 @@ +--- +version: '3.5' +services: + datahub-gms: + image: linkedin/datahub-gms:${DATAHUB_VERSION:-latest} + build: + context: ../../ + dockerfile: docker/gms/Dockerfile + hostname: datahub-gms + container_name: datahub-gms + ports: + - "8080:8080" + environment: + - EBEAN_DATASOURCE_USERNAME=datahub + - EBEAN_DATASOURCE_PASSWORD=datahub + - EBEAN_DATASOURCE_HOST=mariadb:3306 + - EBEAN_DATASOURCE_URL=jdbc:mariadb://mariadb:3306/datahub + - EBEAN_DATASOURCE_DRIVER=org.mariadb.jdbc.Driver + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - NEO4J_HOST=neo4j:7474 + - NEO4J_URI=bolt://neo4j + - NEO4J_USERNAME=neo4j + - NEO4J_PASSWORD=datahub + +networks: + default: + name: datahub_network \ No newline at end of file diff --git a/docker/gms/docker-compose-postgres.yml b/docker/gms/docker-compose-postgres.yml new file mode 100644 index 0000000000000..dcd1608acc196 --- /dev/null +++ b/docker/gms/docker-compose-postgres.yml @@ -0,0 +1,30 @@ +--- +version: '3.5' +services: + datahub-gms: + image: linkedin/datahub-gms:${DATAHUB_VERSION:-latest} + build: + context: ../../ + dockerfile: docker/gms/Dockerfile + hostname: datahub-gms + container_name: datahub-gms + ports: + - "8080:8080" + environment: + - EBEAN_DATASOURCE_USERNAME=datahub + - EBEAN_DATASOURCE_PASSWORD=datahub + - EBEAN_DATASOURCE_HOST=postgres:5432 + - EBEAN_DATASOURCE_URL=jdbc:postgresql://postgres:5432/datahub + - EBEAN_DATASOURCE_DRIVER=org.postgresql.Driver + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - NEO4J_HOST=neo4j:7474 + - NEO4J_URI=bolt://neo4j + - NEO4J_USERNAME=neo4j + - NEO4J_PASSWORD=datahub + +networks: + default: + name: datahub_network \ No newline at end of file diff --git a/docker/mariadb/README.md b/docker/mariadb/README.md new file mode 100644 index 0000000000000..10ae6bcab79bb --- /dev/null +++ b/docker/mariadb/README.md @@ -0,0 +1,39 @@ +# MariaDB + +DataHub GMS can use MariaDB as an alternate storage backend. + +[Official MariaDB Docker image](https://hub.docker.com/_/mariadb) found in Docker Hub is used without +any modification. + +## Run Docker container +Below command will start the MariaDB container. +``` +cd docker/mariadb && docker-compose pull && docker-compose up +``` + +An initialization script [init.sql](init.sql) is provided to container. This script initializes `metadata-aspect` table +which is basically the Key-Value store of the DataHub GMS. + +To connect to MariaDB container, you can type below command: +``` +docker exec -it mariadb mysql -u datahub -pdatahub datahub +``` + +## Container configuration +### External Port +If you need to configure default configurations for your container such as the exposed port, you will do that in +`docker-compose.yml` file. Refer to this [link](https://docs.docker.com/compose/compose-file/#ports) to understand +how to change your exposed port settings. +``` +ports: + - '3306:3306' +``` + +### Docker Network +All Docker containers for DataHub are supposed to be on the same Docker network which is `datahub_network`. +If you change this, you will need to change this for all other Docker containers as well. +``` +networks: + default: + name: datahub_network +``` \ No newline at end of file diff --git a/docker/mariadb/docker-compose.yml b/docker/mariadb/docker-compose.yml new file mode 100644 index 0000000000000..179b565cc43df --- /dev/null +++ b/docker/mariadb/docker-compose.yml @@ -0,0 +1,21 @@ +--- +version: '3.5' +services: + mysql: + container_name: mariadb + hostname: mariadb + image: mariadb:10.5 + restart: always + environment: + MYSQL_DATABASE: 'datahub' + MYSQL_USER: 'datahub' + MYSQL_PASSWORD: 'datahub' + MYSQL_ROOT_PASSWORD: 'datahub' + ports: + - '3306:3306' + volumes: + - ./init.sql:/docker-entrypoint-initdb.d/init.sql + +networks: + default: + name: datahub_network \ No newline at end of file diff --git a/docker/mariadb/init.sql b/docker/mariadb/init.sql new file mode 100644 index 0000000000000..b476765672661 --- /dev/null +++ b/docker/mariadb/init.sql @@ -0,0 +1,28 @@ +-- create metadata aspect table +create table metadata_aspect ( + urn varchar(500) not null, + aspect varchar(200) not null, + version bigint(20) not null, + metadata longtext not null, + createdon datetime(6) not null, + createdby varchar(255) not null, + createdfor varchar(255), + constraint pk_metadata_aspect primary key (urn,aspect,version) +); + +-- create default records for datahub user +insert into metadata_aspect (urn, aspect, version, metadata, createdon, createdby) values( + 'urn:li:corpuser:datahub', + 'com.linkedin.identity.CorpUserInfo', + 0, + '{"displayName":"Data Hub","active":true,"fullName":"Data Hub","email":"datahub@linkedin.com"}', + now(), + 'urn:li:principal:datahub' +), ( + 'urn:li:corpuser:datahub', + 'com.linkedin.identity.CorpUserEditableInfo', + 0, + '{"skills":[],"teams":[],"pictureLink":"https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web/packages/data-portal/public/assets/images/default_avatar.png"}', + now(), + 'urn:li:principal:datahub' +); \ No newline at end of file diff --git a/docker/mysql/README.md b/docker/mysql/README.md index 5df2f35eed97a..69eef38c0a9a1 100644 --- a/docker/mysql/README.md +++ b/docker/mysql/README.md @@ -1,6 +1,7 @@ # MySQL -DataHub GMS uses MySQL as the storage infrastructure. +DataHub GMS uses MySQL as the storage backend. + [Official MySQL Docker image](https://hub.docker.com/_/mysql) found in Docker Hub is used without any modification. diff --git a/docker/postgresql/README.md b/docker/postgresql/README.md new file mode 100644 index 0000000000000..c0a5056914409 --- /dev/null +++ b/docker/postgresql/README.md @@ -0,0 +1,39 @@ +# MySQL + +DataHub GMS can use PostgreSQL as an alternate storage backend. + +[Official PostgreSQL Docker image](https://hub.docker.com/_/postgres) found in Docker Hub is used without +any modification. + +## Run Docker container +Below command will start the MySQL container. +``` +cd docker/postgres && docker-compose pull && docker-compose up +``` + +An initialization script [init.sql](init.sql) is provided to container. This script initializes `metadata-aspect` table +which is basically the Key-Value store of the DataHub GMS. + +To connect to PostgreSQL container, you can type below command: +``` +docker exec -it postgres psql -U datahub +``` + +## Container configuration +### External Port +If you need to configure default configurations for your container such as the exposed port, you will do that in +`docker-compose.yml` file. Refer to this [link](https://docs.docker.com/compose/compose-file/#ports) to understand +how to change your exposed port settings. +``` +ports: + - '5432:5432' +``` + +### Docker Network +All Docker containers for DataHub are supposed to be on the same Docker network which is `datahub_network`. +If you change this, you will need to change this for all other Docker containers as well. +``` +networks: + default: + name: datahub_network +``` \ No newline at end of file diff --git a/docker/postgresql/docker-compose.yml b/docker/postgresql/docker-compose.yml new file mode 100644 index 0000000000000..db4a814942053 --- /dev/null +++ b/docker/postgresql/docker-compose.yml @@ -0,0 +1,19 @@ +--- +version: '3.5' +services: + postgres: + container_name: postgres + hostname: postgres + image: postgres:12.3 + restart: always + environment: + POSTGRES_USER: datahub + POSTGRES_PASSWORD: datahub + ports: + - '5432:5432' + volumes: + - ./init.sql:/docker-entrypoint-initdb.d/init.sql + +networks: + default: + name: datahub_network \ No newline at end of file diff --git a/docker/postgresql/init.sql b/docker/postgresql/init.sql new file mode 100644 index 0000000000000..434e507e4468b --- /dev/null +++ b/docker/postgresql/init.sql @@ -0,0 +1,28 @@ +-- create metadata aspect table +create table metadata_aspect ( + urn varchar(500) not null, + aspect varchar(200) not null, + version bigint not null, + metadata text not null, + createdon timestamp not null, + createdby varchar(255) not null, + createdfor varchar(255), + constraint pk_metadata_aspect primary key (urn,aspect,version) +); + +-- create default records for datahub user +insert into metadata_aspect (urn, aspect, version, metadata, createdon, createdby) values( + 'urn:li:corpuser:datahub', + 'com.linkedin.identity.CorpUserInfo', + 0, + '{"displayName":"Data Hub","active":true,"fullName":"Data Hub","email":"datahub@linkedin.com"}', + now(), + 'urn:li:principal:datahub' +), ( + 'urn:li:corpuser:datahub', + 'com.linkedin.identity.CorpUserEditableInfo', + 0, + '{"skills":[],"teams":[],"pictureLink":"https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web/packages/data-portal/public/assets/images/default_avatar.png"}', + now(), + 'urn:li:principal:datahub' +); diff --git a/gms/README.md b/gms/README.md index d5f61765a4052..51d992d1bfc70 100644 --- a/gms/README.md +++ b/gms/README.md @@ -333,8 +333,3 @@ the application directly from command line after a successful [build](#build): ] } ``` - -## Using a different DB backend -The default implementation of GMS stores the [aspects](../docs/what/asepct.md) in MySQL. You can change the DB backend to PostgreSQL, Oracle, or [other DB systems](https://ebean.io/docs/database/) supported by Ebean by changing the following configurations: -1. Replace the MySQL connector with the desired connector (e.g. `org.postgresql:postgresql:42.2.14`) in [gms/war/build.gradle](https://github.com/linkedin/datahub/blob/master/gms/war/build.gradle). -2. Specify `EBEAN_DATASOURCE_DRIVER` & `EBEAN_DATASOURCE_URL` environment variables to match your environment, e.g. `org.postgresql.Driver` & `jdbc:postgresql://localhost:{port}/{dbName}` diff --git a/gms/war/build.gradle b/gms/war/build.gradle index 43dffadbafedf..428298c5cd609 100644 --- a/gms/war/build.gradle +++ b/gms/war/build.gradle @@ -7,7 +7,9 @@ dependencies { runtime externalDependency.h2 runtime externalDependency.logbackClassic + runtime externalDependency.mariadbConnector runtime externalDependency.mysqlConnector + runtime externalDependency.postgresql runtime spec.product.pegasus.restliSpringBridge }