From 901261786d531bada38d0a27c99b32dadd1bfe54 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 23 Oct 2023 09:45:52 +0200 Subject: [PATCH 1/6] fix yaml validation --- .../src/main/nextflow/validation/SchemaValidator.groovy | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy index 6c105cc6..353ea79f 100644 --- a/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy @@ -184,16 +184,15 @@ class SchemaValidator extends PluginExtensionPoint { def String fileType = SamplesheetConverter.getFileType(samplesheetFile) def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null def List> fileContent - def List> fileContentCasted = [] def Boolean s3PathCheck = params.validationS3PathCheck ? params.validationS3PathCheck : false + def Map types = variableTypes(schemaFile.toString(), baseDir) if(fileType == "yaml"){ fileContent = new Yaml().load((samplesheetFile.text)) } else { - Map types = variableTypes(schemaFile.toString(), baseDir) fileContent = samplesheetFile.splitCsv(header:true, strip:true, sep:delimiter) - fileContentCasted = castToType(fileContent, types) } + def List> fileContentCasted = castToType(fileContent, types) if (validateFile(false, samplesheetFile.toString(), fileContentCasted, schemaFile.toString(), baseDir, s3PathCheck)) { log.debug "Validation passed: '$samplesheetFile' with '$schemaFile'" } From 6581373c2f1e657793c81f207b617c4ec5bab95f Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 23 Oct 2023 09:50:04 +0200 Subject: [PATCH 2/6] update changelog and manifest to v1.1.1 --- CHANGELOG.md | 7 +++++++ plugins/nf-validation/src/resources/META-INF/MANIFEST.MF | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fca5463..26cce82c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # nextflow-io/nf-validation: Changelog +# Version 1.1.1 - Shoyu + +## Bug fixes + +- Fixed an issue where samplesheet with a lot of null values would take forever to validate ([#120](https://github.com/nextflow-io/nf-validation/pull/120)) => Thanks @awgymer for fixing this! +- Now YAML files are actually validated instead of skipped ([#124](https://github.com/nextflow-io/nf-validation/pull/120)) + # Version 1.1.0 - Miso ## Features diff --git a/plugins/nf-validation/src/resources/META-INF/MANIFEST.MF b/plugins/nf-validation/src/resources/META-INF/MANIFEST.MF index 7ccb5e1d..69e4fc77 100644 --- a/plugins/nf-validation/src/resources/META-INF/MANIFEST.MF +++ b/plugins/nf-validation/src/resources/META-INF/MANIFEST.MF @@ -1,6 +1,6 @@ Manifest-Version: 1.0 Plugin-Id: nf-validation -Plugin-Version: 1.1.0 +Plugin-Version: 1.1.1 Plugin-Class: nextflow.validation.ValidationPlugin Plugin-Provider: nextflow Plugin-Requires: >=22.10.0 From 6cc34d86a6edb2f85caa37210dd812ff7116a217 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 23 Oct 2023 10:48:23 +0200 Subject: [PATCH 3/6] fixed issue with no-header-samplesheet validation --- .../validation/SchemaValidator.groovy | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy index 353ea79f..c1669b55 100644 --- a/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy @@ -186,8 +186,18 @@ class SchemaValidator extends PluginExtensionPoint { def List> fileContent def Boolean s3PathCheck = params.validationS3PathCheck ? params.validationS3PathCheck : false def Map types = variableTypes(schemaFile.toString(), baseDir) + def Boolean containsHeader = !(types.keySet().size() == 1 && types.keySet()[0] == "") + + if(!containsHeader){ + types = ["empty": types[""]] + } if(fileType == "yaml"){ - fileContent = new Yaml().load((samplesheetFile.text)) + fileContent = new Yaml().load((samplesheetFile.text)).collect { + if(containsHeader) { + return it as Map + } + return ["empty": it] as Map + } } else { fileContent = samplesheetFile.splitCsv(header:true, strip:true, sep:delimiter) @@ -412,15 +422,25 @@ class SchemaValidator extends PluginExtensionPoint { def String fileType = SamplesheetConverter.getFileType(file_path) def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null def List> fileContent - def List> fileContentCasted = [] + def Map types = variableTypes(schema_name, baseDir) + def Boolean containsHeader = !(types.keySet().size() == 1 && types.keySet()[0] == "") + + if(!containsHeader){ + types = ["empty": types[""]] + } + if(fileType == "yaml"){ - fileContent = new Yaml().load((file_path.text)) + fileContent = new Yaml().load(file_path.text).collect { + if(containsHeader) { + return it as Map + } + return ["empty": it] as Map + } } else { - Map types = variableTypes(schema_name, baseDir) fileContent = file_path.splitCsv(header:true, strip:true, sep:delimiter) - fileContentCasted = castToType(fileContent, types) } + def List> fileContentCasted = castToType(fileContent, types) if (validateFile(useMonochromeLogs, key, fileContentCasted, schema_name, baseDir, s3PathCheck)) { log.debug "Validation passed: '$key': '$file_path' with '$schema_name'" } From c085fd1895c30e645872dc1d334327fbc51ee53e Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 23 Oct 2023 10:51:57 +0200 Subject: [PATCH 4/6] fix no header csv/tsv --- .../src/main/nextflow/validation/SchemaValidator.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy b/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy index c1669b55..4c0b053e 100644 --- a/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy +++ b/plugins/nf-validation/src/main/nextflow/validation/SchemaValidator.groovy @@ -200,7 +200,7 @@ class SchemaValidator extends PluginExtensionPoint { } } else { - fileContent = samplesheetFile.splitCsv(header:true, strip:true, sep:delimiter) + fileContent = samplesheetFile.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter) } def List> fileContentCasted = castToType(fileContent, types) if (validateFile(false, samplesheetFile.toString(), fileContentCasted, schemaFile.toString(), baseDir, s3PathCheck)) { @@ -438,7 +438,7 @@ class SchemaValidator extends PluginExtensionPoint { } } else { - fileContent = file_path.splitCsv(header:true, strip:true, sep:delimiter) + fileContent = file_path.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter) } def List> fileContentCasted = castToType(fileContent, types) if (validateFile(useMonochromeLogs, key, fileContentCasted, schema_name, baseDir, s3PathCheck)) { From cc3f54b7a824d487a5582431dc9089e6e25fb90e Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 23 Oct 2023 11:40:13 +0200 Subject: [PATCH 5/6] add failing test for yaml validation --- .../PluginExtensionMethodsTest.groovy | 31 +++++++++++++++++++ .../src/testResources/nextflow_schema.json | 1 + .../src/testResources/wrong.yaml | 11 +++++++ 3 files changed, 43 insertions(+) create mode 100644 plugins/nf-validation/src/testResources/wrong.yaml diff --git a/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy b/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy index c5e3c179..58dee1c0 100644 --- a/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy +++ b/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy @@ -194,6 +194,37 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ !stdout } + def 'should validate a schema yaml with failures' () { + given: + def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() + def SCRIPT_TEXT = """ + params.input = 'src/testResources/wrong.yaml' + params.outdir = 'src/testResources/testDir' + include { validateParameters } from 'plugin/nf-validation' + + validateParameters(parameters_schema: '$schema') + """ + + when: + dsl_eval(SCRIPT_TEXT) + def stdout = capture + .toString() + .readLines() + .findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null } + + then: + def error = thrown(SchemaValidationException) + def errorMessages = error.message.readLines() + errorMessages[0] == "\033[0;31mThe following errors have been detected:" + errorMessages[2] == "* -- Entry 1 - field_9: the file or directory 'non_existing_path' does not exist." + errorMessages[3] == "* -- Entry 1 - field_7: the file or directory 'non_existing_file.tsv' does not exist." + errorMessages[4] == '* -- Entry 1 - field_7: string [non_existing_file.tsv] does not match pattern ^.*\\.txt$ (non_existing_file.tsv)' + errorMessages[5] == "* -- Entry 1 - field_8: 'src/testResources/test.txt' is not a directory, but a file (src/testResources/test.txt)" + errorMessages[6] == "* -- Entry 1 - field_5: expected type: Number, found: String (string)" + errorMessages[7] == "* -- Entry 1 - field_6: expected type: Boolean, found: String (20)" + !stdout + } + def 'should find unexpected params' () { given: def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() diff --git a/plugins/nf-validation/src/testResources/nextflow_schema.json b/plugins/nf-validation/src/testResources/nextflow_schema.json index 5f8b637a..7cc70ee1 100644 --- a/plugins/nf-validation/src/testResources/nextflow_schema.json +++ b/plugins/nf-validation/src/testResources/nextflow_schema.json @@ -17,6 +17,7 @@ "format": "file-path", "mimetype": "text/csv", "pattern": "^\\S+\\.(csv|tsv|yaml)$", + "schema": "src/testResources/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" diff --git a/plugins/nf-validation/src/testResources/wrong.yaml b/plugins/nf-validation/src/testResources/wrong.yaml new file mode 100644 index 00000000..515dad85 --- /dev/null +++ b/plugins/nf-validation/src/testResources/wrong.yaml @@ -0,0 +1,11 @@ +- field_1: fullField + field_2: 10 + field_3: true + field_4: string1 + field_5: string + field_6: 20 + field_7: non_existing_file.tsv + field_8: src/testResources/test.txt + field_9: non_existing_path + field_10: unique1 + field_11: 1 From 4d94572e47af5d96084f6af37d702e8ddbf42951 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 23 Oct 2023 13:27:01 +0200 Subject: [PATCH 6/6] use the right schema for yaml test --- .../PluginExtensionMethodsTest.groovy | 12 +++++------- .../src/testResources/nextflow_schema.json | 1 - .../nf-validation/src/testResources/wrong.yaml | 17 ++++++----------- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy b/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy index 58dee1c0..247629db 100644 --- a/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy +++ b/plugins/nf-validation/src/test/nextflow/validation/PluginExtensionMethodsTest.groovy @@ -196,7 +196,7 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ def 'should validate a schema yaml with failures' () { given: - def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString() + def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString() def SCRIPT_TEXT = """ params.input = 'src/testResources/wrong.yaml' params.outdir = 'src/testResources/testDir' @@ -216,12 +216,10 @@ class PluginExtensionMethodsTest extends Dsl2Spec{ def error = thrown(SchemaValidationException) def errorMessages = error.message.readLines() errorMessages[0] == "\033[0;31mThe following errors have been detected:" - errorMessages[2] == "* -- Entry 1 - field_9: the file or directory 'non_existing_path' does not exist." - errorMessages[3] == "* -- Entry 1 - field_7: the file or directory 'non_existing_file.tsv' does not exist." - errorMessages[4] == '* -- Entry 1 - field_7: string [non_existing_file.tsv] does not match pattern ^.*\\.txt$ (non_existing_file.tsv)' - errorMessages[5] == "* -- Entry 1 - field_8: 'src/testResources/test.txt' is not a directory, but a file (src/testResources/test.txt)" - errorMessages[6] == "* -- Entry 1 - field_5: expected type: Number, found: String (string)" - errorMessages[7] == "* -- Entry 1 - field_6: expected type: Boolean, found: String (20)" + errorMessages[2] == "* -- Entry 1: Missing required value: sample" + errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)" + errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)" + errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)" !stdout } diff --git a/plugins/nf-validation/src/testResources/nextflow_schema.json b/plugins/nf-validation/src/testResources/nextflow_schema.json index 7cc70ee1..5f8b637a 100644 --- a/plugins/nf-validation/src/testResources/nextflow_schema.json +++ b/plugins/nf-validation/src/testResources/nextflow_schema.json @@ -17,7 +17,6 @@ "format": "file-path", "mimetype": "text/csv", "pattern": "^\\S+\\.(csv|tsv|yaml)$", - "schema": "src/testResources/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" diff --git a/plugins/nf-validation/src/testResources/wrong.yaml b/plugins/nf-validation/src/testResources/wrong.yaml index 515dad85..c4b45cb9 100644 --- a/plugins/nf-validation/src/testResources/wrong.yaml +++ b/plugins/nf-validation/src/testResources/wrong.yaml @@ -1,11 +1,6 @@ -- field_1: fullField - field_2: 10 - field_3: true - field_4: string1 - field_5: string - field_6: 20 - field_7: non_existing_file.tsv - field_8: src/testResources/test.txt - field_9: non_existing_path - field_10: unique1 - field_11: 1 +- fastq_1: test1_fastq1.fastq.gz + fastq_2: test1_fastq2.fasta + strandedness: weird +- sample: test 2 + fastq_1: test2_fastq1.fastq.gz + strandedness: forward