-
Notifications
You must be signed in to change notification settings - Fork 183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Triton model versioning #8935
Triton model versioning #8935
Changes from 5 commits
1d3f1bc
baa06ff
525e3c0
c9ddd66
146f4b8
0852476
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import os, sys | ||
from collections import OrderedDict | ||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter | ||
from google.protobuf import text_format, message, descriptor | ||
from tritonclient import grpc | ||
|
||
def get_checksum(filename, chunksize=4096): | ||
import hashlib | ||
with open(filename, 'rb') as f: | ||
file_hash = hashlib.md5() | ||
while chunk := f.read(chunksize): | ||
file_hash.update(chunk) | ||
return file_hash.hexdigest() | ||
|
||
def update_config(args): | ||
# update config path to be output path (in case view is called) | ||
if args.copy: | ||
args.config = "config.pbtxt" | ||
if isinstance(args.copy,str): | ||
args.config = os.path.join(args.copy, args.config) | ||
|
||
with open(args.config,'w') as outfile: | ||
text_format.PrintMessage(args.model, outfile, use_short_repeated_primitives=True) | ||
|
||
def cfg_common(args): | ||
args.model = grpc.model_config_pb2.ModelConfig() | ||
if hasattr(args,'config'): | ||
with open(args.config,'r') as infile: | ||
text_format.Parse(infile.read(), args.model) | ||
|
||
def cfg_checksum(args): | ||
agents = args.model.model_repository_agents.agents | ||
checksum_agent = next((agent for agent in agents if agent.name=="checksum"), None) | ||
if checksum_agent is None: | ||
checksum_agent = agents.add(name="checksum") | ||
|
||
incorrect = [] | ||
missing = [] | ||
|
||
from glob import glob | ||
config_dir = os.path.dirname(args.config) | ||
for filename in glob(os.path.join(config_dir,"*/*")): | ||
if os.path.islink(os.path.dirname(filename)): continue | ||
checksum = get_checksum(filename) | ||
# key = algorithm:[filename relative to config.pbtxt dir] | ||
filename = os.path.relpath(filename, config_dir) | ||
filekey = "MD5:{}".format(filename) | ||
if filekey in checksum_agent.parameters and checksum!=checksum_agent.parameters[filekey]: | ||
incorrect.append(filename) | ||
if args.update and args.force: | ||
checksum_agent.parameters[filekey] = checksum | ||
elif filekey not in checksum_agent.parameters: | ||
missing.append(filename) | ||
if args.update: | ||
checksum_agent.parameters[filekey] = checksum | ||
else: | ||
continue | ||
|
||
needs_update = len(missing)>0 | ||
needs_force_update = len(incorrect)>0 | ||
|
||
if not args.quiet: | ||
if needs_update: | ||
print("\n".join(["Missing checksums:"]+missing)) | ||
if needs_force_update: | ||
print("\n".join(["Incorrect checksums:"]+incorrect)) | ||
|
||
if needs_force_update: | ||
if not (args.update and args.force): | ||
extra_args = [arg for arg in ["--update","--force"] if arg not in sys.argv] | ||
raise RuntimeError("\n".join([ | ||
"Incorrect checksum(s) found, indicating existing model file(s) has been changed.", | ||
"This violates policy. To override, run the following command, and provide a justification in your PR:", | ||
"{} {}".format(" ".join(sys.argv), " ".join(extra_args)) | ||
])) | ||
else: | ||
update_config(args) | ||
elif needs_update: | ||
if not args.update: | ||
extra_args = [arg for arg in ["--update"] if arg not in sys.argv] | ||
raise RuntimeError("\n".join([ | ||
"Missing checksum(s) found, indicating new model file(s).", | ||
"To update, run the following command:", | ||
"{} {}".format(" ".join(sys.argv), " ".join(extra_args)) | ||
])) | ||
else: | ||
update_config(args) | ||
|
||
if __name__=="__main__": | ||
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter, description="handle model file checksums") | ||
parser.add_argument("-c", "--config", type=str, default="", required=True, help="path to input config.pbtxt file") | ||
parser.add_argument("--copy", metavar="dir", default=False, const=True, nargs='?', type=str, | ||
help="make a copy of config.pbtxt instead of editing in place ([dir] = output path for copy; if omitted, current directory is used)" | ||
) | ||
parser.add_argument("--update", default=False, action="store_true", help="update missing checksums in config.pbtxt") | ||
parser.add_argument("--force", default=False, action="store_true", help="force update all checksums in config.pbtxt") | ||
parser.add_argument("--quiet", default=False, action="store_true", help="suppress printouts") | ||
|
||
args = parser.parse_args() | ||
|
||
cfg_common(args) | ||
|
||
cfg_checksum(args) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -359,6 +359,8 @@ toolz==0.7.1 | |
tornado==6.3.3 | ||
tqdm==4.65.0 | ||
traitlets==5.9.0 | ||
# always sync version number with triton-inference-client.spec (C++ version) | ||
tritonclient==2.25.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kpedro88 , this alone is not going to include python tritonclient in cmssw. Please add |
||
trove-classifiers==2023.3.9 | ||
typed-ast==1.5.4 | ||
typing-extensions==4.5.0 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py | ||
index a6d8bcf64..24da4cac5 100644 | ||
--- a/python/google/protobuf/text_format.py | ||
+++ b/python/google/protobuf/text_format.py | ||
@@ -470,9 +470,7 @@ class _Printer(object): | ||
entry_submsg = value.GetEntryClass()(key=key, value=value[key]) | ||
self.PrintField(field, entry_submsg) | ||
elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | ||
- if (self.use_short_repeated_primitives | ||
- and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE | ||
- and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING): | ||
+ if self.use_short_repeated_primitives: | ||
self._PrintShortRepeatedPrimitivesValue(field, value) | ||
else: | ||
for element in value: |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -312,6 +312,9 @@ find external/%cmsplatf -type l | xargs ls -l | |
%{?PatchReleaseSymlinkRelocate:%PatchReleaseSymlinkRelocate} | ||
echo "%{cmsroot}" > %{i}/config/scram_basedir | ||
|
||
# install checksum tool for local use (via scram) | ||
cp data/cmsTritonChecksumTool %i/bin/ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @smuzaffar is this the right way to install the tool into CMSSW? Running a full build to test this is a bit beyond my current resources... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kpedro88 , There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah ok @kpedro88 , after reading cms-sw/cmssw#43695 I remember where this tool is used. It should be copied here in scram-project-build.file. This tool is needed by cms-data externals. Can you please remind me how to run this for a cms-data repo? All cms-data external use https://github.com/cms-sw/cmsdist/blob/IB/CMSSW_14_0_X/master/cmsswdata-github.file . So for selected cms-data externals (where we want to run it), we need to create a file e.g.
so that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it needs to be here so it can run on cms-data packages during build time, and will then be copied into CMSSW. If there's a better place to execute that copy operation, I'm happy to move it. My untested attempt to execute the tool during build time is here: (not in this PR, because I want to have everything in place before we start enforcing it) This will be a no-op for most cms-data packages that don't have any config.pbtxt files, but using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks @kpedro88 , about kpedro88@44f13f2, we should not modify the content of cmsswdata-github.file file otherwise it will rebuild all the data externals. We can add
in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My concern, though, is that this is an additional manual step that has to be taken any time a Triton model is added to a (new or existing) cms-data package. This build-time check should be automatic. Rebuilding all the data externals would only happen once, right? Is that such a big problem? Should I move the line from the "Prep" step to the "Build" step either way? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes rebuilding all data means re-distributing 20GB of data again. Although this is not a concern for CVMFS distribution but for online where cmssw is install on local disk this can cause issues. Adding new data external is automated, bot know how to add a new data external in cmsdist. We can teach bot to create this cmsdist/data/data-.file for new data external if it contains config.pbtxt There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kpedro88 , see cms-sw/cms-bot#2143 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @smuzaffar thanks! This looks like a good approach that satisfies both of our concerns. A few questions:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes
No , not by default. |
||
|
||
%post | ||
export SCRAM_ARCH=%cmsplatf | ||
cd $RPM_INSTALL_PREFIX/%pkgrel | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kpedro88 , I think it is better to add this in to cmssw repo
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah OK, this is need for cms-data externals. So yes it sould not go in to cmssw