Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(protobuf): disable binary protoc custom properties #10778

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe

### Breaking Changes

- Protobuf CLI will no longer create binary encoded protoc custom properties. Flag added `-protocProp` in case this
behavior is required.

### Potential Downtime

### Deprecations
Expand Down
2 changes: 2 additions & 0 deletions metadata-integration/java/datahub-protobuf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,8 @@ usage: Proto2DataHub
--platform <arg> [Optional] The data platform to produce
schemas for. e.g. kafka, snowflake, etc.
(defaults to kafka)
-protocProp [Optional] Store the protoc as a custom
property. (defaults to false)
--slack_id <arg> [Optional] The Slack team id if your protobuf
files contain comments with references to
channel names. We will translate comments like
Expand Down
4 changes: 2 additions & 2 deletions metadata-integration/java/datahub-protobuf/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ dependencies {
implementation externalDependency.jgrapht
implementation externalDependency.gson
implementation externalDependency.commonsCli
implementation externalDependency.httpAsyncClient
implementation externalDependency.httpClient
implementation externalDependency.slf4jApi
implementation externalDependency.jacksonCore
compileOnly externalDependency.lombok
Expand Down Expand Up @@ -88,7 +88,6 @@ shadowJar {
it.name
}
dependencies {

exclude(dependency {
exclude_modules.contains(it.name)
})
Expand All @@ -115,6 +114,7 @@ shadowJar {
relocate 'org.jgrapht', 'datahub.shaded.org.jgrapht'
relocate 'org.jheaps', 'datahub.shaded.org.jheaps'
relocate 'ch.randelshofer', 'datahub.shaded.ch.randelshofer'
relocate 'org.apache.hc', 'datahub.shaded.org.apache.hc'

finalizedBy checkShadowJar
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,13 @@ public class Proto2DataHub {
+ "(Default is schema)")
.build();

private static final Option OPTION_PROTOC_CUSTOM_PROPERTY =
Option.builder()
.option("protocProp")
.hasArg(false)
.desc("[Optional] Store the protoc as a custom property. (defaults to false)")
.build();

enum TransportOptions {
REST,
KAFKA,
Expand All @@ -172,6 +179,7 @@ static class AppConfig {
private final String slackId;
private final String dataPlatform;
private final String protoc;
private final boolean enableProtocCustomProperty;
private final String inputFile;
private final String messageName;
private final String inputDir;
Expand Down Expand Up @@ -207,6 +215,7 @@ static class AppConfig {
subType = cli.getOptionValue(OPTION_SUBTYPE, "schema").toLowerCase(Locale.ROOT);
inputDir = cli.getOptionValue(OPTION_DIR, null);
excludePatterns = cli.getOptionValues(OPTION_EXCLUDE_PATTERN);
enableProtocCustomProperty = cli.hasOption(OPTION_PROTOC_CUSTOM_PROPERTY);
}

private AppConfig validate() throws Exception {
Expand Down Expand Up @@ -269,7 +278,8 @@ public static void main(String[] args) throws Exception {
.addOption(OPTION_TRANSPORT)
.addOption(OPTION_FILENAME)
.addOption(OPTION_SUBTYPE)
.addOption(OPTION_HELP);
.addOption(OPTION_HELP)
.addOption(OPTION_PROTOC_CUSTOM_PROPERTY);

Options firstPassOptions = new Options().addOption(OPTION_HELP);

Expand Down Expand Up @@ -357,6 +367,7 @@ public static void main(String[] args) throws Exception {
ProtobufDataset.builder()
.setDataPlatformUrn(new DataPlatformUrn(config.dataPlatform))
.setProtocIn(new FileInputStream(config.protoc))
.setEnableProtocCustomProperty(config.enableProtocCustomProperty)
.setFilename(filePath.toString())
.setSchema(textSchema)
.setAuditStamp(auditStamp)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public static class Builder {
private String githubOrganization;
private String slackTeamId;
private String subType;
private boolean enableProtocCustomProperty;

public Builder setGithubOrganization(@Nullable String githubOrganization) {
this.githubOrganization = githubOrganization;
Expand Down Expand Up @@ -119,6 +120,11 @@ public Builder setSubType(@Nullable String subType) {
return this;
}

public Builder setEnableProtocCustomProperty(boolean enableProtocCustomProperty) {
this.enableProtocCustomProperty = enableProtocCustomProperty;
return this;
}

public ProtobufDataset build() throws IOException {
FileDescriptorSet fileSet = FileDescriptorSet.parseFrom(protocBytes);

Expand All @@ -135,6 +141,7 @@ public ProtobufDataset build() throws IOException {
.setDatasetVisitor(
DatasetVisitor.builder()
.protocBase64(Base64.getEncoder().encodeToString(protocBytes))
.enableProtocCustomProperty(enableProtocCustomProperty)
.datasetPropertyVisitors(
List.of(new KafkaTopicPropertyVisitor(), new PropertyVisitor()))
.institutionalMemoryMetadataVisitors(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ public class DatasetVisitor
@Builder.Default
private final ProtobufModelVisitor<Deprecation> deprecationVisitor = new DeprecationVisitor();

@Builder.Default private final boolean enableProtocCustomProperty = false;

@Override
public Stream<MetadataChangeProposalWrapper<? extends RecordTemplate>> visitGraph(
VisitContext context) {
Expand All @@ -92,7 +94,9 @@ public Stream<MetadataChangeProposalWrapper<? extends RecordTemplate>> visitGrap
.setCustomProperties(
new StringMap(
Stream.concat(
Stream.of(Map.entry("protoc", protocBase64)),
enableProtocCustomProperty
? Stream.of(Map.entry("protoc", protocBase64))
: Stream.empty(),
g.accept(context, datasetPropertyVisitors)
.flatMap(
props ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static datahub.protobuf.TestFixtures.*;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNull;

import com.linkedin.common.urn.DatasetUrn;
import com.linkedin.data.template.RecordTemplate;
Expand All @@ -21,7 +22,8 @@ public class DatasetVisitorTest {
@Test
public void protocBase64Test() throws URISyntaxException, IOException {
String expected = "23454345452345233455";
DatasetVisitor test = DatasetVisitor.builder().protocBase64(expected).build();
DatasetVisitor test =
DatasetVisitor.builder().protocBase64(expected).enableProtocCustomProperty(true).build();

List<MetadataChangeProposalWrapper<? extends RecordTemplate>> changes =
test.visitGraph(
Expand All @@ -35,6 +37,23 @@ public void protocBase64Test() throws URISyntaxException, IOException {
.collect(Collectors.toList());

assertEquals(expected, extractCustomProperty(changes.get(0), "protoc"));

DatasetVisitor testDisabled =
DatasetVisitor.builder().protocBase64(expected).enableProtocCustomProperty(false).build();

List<MetadataChangeProposalWrapper<? extends RecordTemplate>> changesDisabled =
testDisabled
.visitGraph(
VisitContext.builder()
.auditStamp(TEST_AUDIT_STAMP)
.datasetUrn(
DatasetUrn.createFromString(
"urn:li:dataset:(urn:li:dataPlatform:kafka,protobuf.MessageA,TEST)"))
.graph(getTestProtobufGraph("protobuf", "messageA"))
.build())
.collect(Collectors.toList());

assertNull(extractCustomProperty(changesDisabled.get(0), "protoc"));
}

@Test
Expand Down
Loading