Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement calibratable XRootD computational overhead #68

Merged
merged 5 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion data/workload-configs/crown_ttbar_copyjob.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
},
"flops": {
"type": "gaussian",
"average": 1174794431328,
"average": 79613793103,
"sigma": 0
},
"memory": {
Expand Down
4 changes: 4 additions & 0 deletions src/SimpleSimulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ bool SimpleSimulator::infile_caching_on = true; // flag to turn
bool SimpleSimulator::prefetching_on = true; // flag to enable prefetching during streaming
bool SimpleSimulator::shuffle_jobs = false; // flag to enable job shuffling during submission
double SimpleSimulator::xrd_block_size = 1. * 1000 * 1000 * 1000;// maximum size of the streamed file blocks in bytes for the XRootD-ish streaming
double SimpleSimulator::xrd_add_flops_per_time = 20000000000;// flops overhead introduced by XRootD streaming per second
// TODO: The initialized below is likely bogus (at compile time?)
std::set<std::string> SimpleSimulator::cache_hosts;
std::set<std::string> SimpleSimulator::storage_hosts;
Expand Down Expand Up @@ -243,6 +244,7 @@ po::variables_map process_program_options(int argc, char **argv) {
bool shuffle_jobs = false;

double xrd_block_size = 1000. * 1000 * 1000;
double xrd_add_flops_per_time = 20000000000;
std::string storage_service_buffer_size = "1048576";// 1MiB

unsigned int seed = 42;
Expand All @@ -256,6 +258,7 @@ po::variables_map process_program_options(int argc, char **argv) {
op("no-caching", po::bool_switch()->default_value(no_caching), "switch to turn on/off the caching of jobs' input-files")("prefetch-off", po::bool_switch()->default_value(prefetch_off), "switch to turn on/off prefetching for streaming of input-files")("shuffle-jobs", po::bool_switch()->default_value(shuffle_jobs), "switch to turn on/off shuffling jobs during submission");
op("output-file,o", po::value<std::string>()->value_name("<out file>")->required(), "path for the CSV file containing output information about the jobs in the simulation");
op("xrd-blocksize,x", po::value<double>()->default_value(xrd_block_size), "size of the blocks XRootD uses for data streaming")("storage-buffer-size,b", po::value<StorageServiceBufferValue>()->default_value(StorageServiceBufferValue(storage_service_buffer_size)), "buffer size used by the storage services when communicating data");
op("xrd-flops-per-time", po::value<double>()->default_value(xrd_add_flops_per_time), "flops overhead introduced by XRootD data streaming per second");
op("cache-scope", po::value<cacheScope>()->default_value(cacheScope("local")), "Set the network scope in which caches can be found:\n local: only caches on same machine\n network: caches in same network zone\n siblingnetwork: also include caches in sibling networks");
op("seed,s", po::value<unsigned int>()->default_value(seed), "Set the seed for the random generator");

Expand Down Expand Up @@ -450,6 +453,7 @@ int main(int argc, char **argv) {

// Set XRootD block size
SimpleSimulator::xrd_block_size = vm["xrd-blocksize"].as<double>();
SimpleSimulator::xrd_add_flops_per_time = vm["xrd-flops-per-time"].as<double>();

// Set StorageService buffer size/type
std::string buffer_size = vm["storage-buffer-size"].as<StorageServiceBufferValue>().get();
Expand Down
12 changes: 8 additions & 4 deletions src/SimpleSimulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,22 @@ class SimpleSimulator {
static std::set<std::string> network_monitors;// hosts configured to monitor network

static void fillHostsInSiblingZonesMap(bool include_subzones);
static bool local_cache_scope;

static std::map<std::string, std::set<std::string>> hosts_in_zones;// map holding information of all hosts present in network zones
static std::map<std::shared_ptr<wrench::StorageService>, LRU_FileList> global_file_map;// map holding files informations

// global simulator settings and parameters
static bool infile_caching_on;
static bool prefetching_on;
static bool local_cache_scope;

static bool shuffle_jobs;
static std::map<std::shared_ptr<wrench::StorageService>, LRU_FileList> global_file_map;

static double xrd_block_size;
static double xrd_add_flops_per_time;
static std::mt19937 gen;

// Cores required
/*// Cores required
static int req_cores;
// Flops distribution
static double mean_flops;
Expand All @@ -49,7 +53,7 @@ class SimpleSimulator {
// Output-file distribution
static double mean_outsize;
static double sigma_outsize;
static std::normal_distribution<double> *outsize_dist;
static std::normal_distribution<double> *outsize_dist;*/

/** @brief Output filestream object to write out dump */
static std::ofstream filedump;
Expand Down
6 changes: 6 additions & 0 deletions src/computation/StreamedComputation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ void StreamedComputation::performComputation(std::shared_ptr<wrench::ActionExecu

auto the_action = std::dynamic_pointer_cast<MonitorAction>(action_executor->getAction());// executed action

double job_start_time = wrench::Simulation::getCurrentSimulatedDate();

double infile_transfer_time = 0.;
double compute_time = 0.;

Expand Down Expand Up @@ -67,6 +69,10 @@ void StreamedComputation::performComputation(std::shared_ptr<wrench::ActionExecu
double num_bytes = std::min<double>(SimpleSimulator::xrd_block_size, data_to_process);
double num_flops = determineFlops(num_bytes, total_data_size);
WRENCH_INFO("Chunk: %.2lf bytes / %.2lf flops", num_bytes, num_flops);
// Add XRootD FLOPs overhead that increments with execution time
double xrd_overhead_flops = SimpleSimulator::xrd_add_flops_per_time * (wrench::Simulation::getCurrentSimulatedDate() - job_start_time);
num_flops += xrd_overhead_flops;
WRENCH_DEBUG(" + %.2lf flops XRootD overhead", xrd_overhead_flops);
// Start the computation asynchronously
simgrid::s4u::ExecPtr exec = simgrid::s4u::this_actor::exec_init(num_flops);
double exec_start_time = 0.0;
Expand Down
5 changes: 3 additions & 2 deletions tools/hitratePerformancePlots.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,9 @@ def mapHostToSite(test: str, mapping: 'dict[str,str]',):
hue="Site", hue_order=sites,
data=df,
estimator="median", errorbar=("pi",95), # ci = Confidence Interval, pi = Percentile Interval, sd = Standard Deviation, se = Standard Error of Mean
dodge=True, join=False,
markers=markers, capsize=0.5/len(sites), errwidth=1.,
dodge=True,
linestyle='none',
markers=markers, capsize=0.5/len(sites), err_kws={'linewidth': 1.0},
palette=sns.color_palette("colorblind", n_colors=len(sites)),
ax=ax1
)
Expand Down
23 changes: 12 additions & 11 deletions tools/hitrateScan.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ action() {
local parent="$( dirname "$base" )"

local PLATFORM="$parent/data/platform-files/sgbatch_validation.xml"
local WORKLOAD="$parent/data/workload-configs/crown_ttbar_validation.json"
local DATASET="$parent/data/dataset-configs/sample.json"
local WORKLOAD="$parent/data/workload-configs/crown_ttbar_copyjob.json"
local DATASET="$parent/data/dataset-configs/crown_ttbar_copyjob.json"

# local NJOBS=1
# local NINFILES=20 #10
Expand All @@ -32,28 +32,29 @@ action() {

local DUPLICATIONS=48

local XRD_BLOCKSIZE=100000000
local STORAGE_BUFFER_SIZE=1048576
local XRD_BLOCKSIZE=10000000000
local STORAGE_BUFFER_SIZE=0

local SCENARIO="fullstream" # further options synchronized with plotting script "copy", "simplifiedstream", "fullstream"
local SCENARIO="fastNetworkfastCache" # further options synchronized with plotting script "copy", "simplifiedstream", "fullstream"

local OUTDIR="$parent/tmp/outputs"
local OUTDIR="$parent/tmp/outputs/copyjobs"
if [ ! -d $OUTDIR ]; then
mkdir -p $OUTDIR
fi

for hitrate in $(LANG=en_UK seq 0.0 0.1 1.0)
do
dc-sim --platform "$PLATFORM" \
--hitrate ${hitrate} \
--workload-configurations "$WORKLOAD" \
--dataset-configurations "$DATASET" \
--duplications $DUPLICATIONS \
--hitrate ${hitrate} \
--xrd-blocksize $XRD_BLOCKSIZE \
--output-file ${OUTDIR}/hitratescaling_${SCENARIO}_xrd${XRD_BLOCKSIZE}_${NJOBS}jobs_hitrate${hitrate}.csv \
--cfg=network/loopback-bw:100000000000000 \
--storage-buffer-size $STORAGE_BUFFER_SIZE \
--cfg=network/loopback-bw:100000000000000 \
--no-caching \
--workload-configurations "$WORKLOAD" \
--dataset-configurations "$DATASET" #\
--seed 42 \
--output-file ${OUTDIR}/hitratescaling_${SCENARIO}_xrd${XRD_BLOCKSIZE}_${NJOBS}jobs_hitrate${hitrate}.csv
# --no-streaming \
# --wrench-full-log
# --log=simple_wms.threshold=debug \
Expand Down
Loading