nextstrain · joverlee521 · Aug 29, 2023 · Aug 17, 2023 · Aug 17, 2023 · Aug 17, 2023
diff --git a/ncbi-virus-url b/ncbi-virus-url
@@ -12,6 +12,7 @@ and observing the network activity at
     https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/virus?SeqType_s=Nucleotide
 """
 from urllib.parse import urlencode
+from typing import List, Optional
 import argparse
 
 def parse_args():
@@ -20,9 +21,13 @@ def parse_args():
         help="NCBI Taxon ID. Visit NCBI virus at " +
              "https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/find-data/virus " +
              "to search for supported taxon IDs.")
+    parser.add_argument("--filters", required=False, nargs="*",
+        help="Filter criteria to add as `fq` param values. " +
+             "Apply filters via the NCBI Virus UI and observe the network " +
+             "activity to find the desired filter string.")
     return parser.parse_args()
 
-def build_query_url(ncbi_taxon_id: str):
+def build_query_url(ncbi_taxon_id: str, filters: Optional[List[str]]=None):
     """
     Generate URL to download all viral sequences and their curated metadata
     from GenBank via NCBI Virus.
@@ -33,6 +38,7 @@ def build_query_url(ncbi_taxon_id: str):
         'fq': [
             '{!tag=SeqType_s}SeqType_s:("Nucleotide")', # Nucleotide sequences (as opposed to protein)
             f'VirusLineageId_ss:({ncbi_taxon_id})',
+            *(filters or []),
         ],
 
         # Unclear, but seems necessary.
@@ -80,7 +86,10 @@ def build_query_url(ncbi_taxon_id: str):
 
 def main():
     args = parse_args()
-    build_query_url(args.ncbi_taxon_id)
+    build_query_url(
+        ncbi_taxon_id=args.ncbi_taxon_id,
+        filters=args.filters,
+    )
 
 if __name__ == '__main__':
     main()