diff --git a/simulation/.gitignore b/simulation/.gitignore new file mode 100644 index 0000000..e43b0f9 --- /dev/null +++ b/simulation/.gitignore @@ -0,0 +1 @@ +.DS_Store diff --git a/simulation/README.md b/simulation/README.md new file mode 100644 index 0000000..2881d1c --- /dev/null +++ b/simulation/README.md @@ -0,0 +1,52 @@ +# Lightning Simulation Code +The Lightning simulations compare the performance of large DNNs on Lightning against their performance on GPUs and AI accelerators. The code in this folder reproduces results reported in Section 9 of the Lightning paper. + +## Overview +Event-driven simulation occurs in two phases: + +1. Scheduling DNN requests. `build_sim_for_mixed_arrivals()` does this by scheduling a particular number of different inference requests (spaced based on their associated DNN input sizes using a Poisson distribution). + +2. Simulating the schedule. `Simulator.simulate()` will act out the requests at their specified times and return the average completion times per model. + +Supports comparisons between Lightning, NVIDIA's A100 and A100X, and Microsoft Brainwave. + +## Folder structure +| Source Files |Description | +| ----- | ----- | +| `orders/` | Different mixed orders of DNN requests to be converted to schedules | +| `sim_scheds/` | Schedules of DNN request traces for simulation (specific to a network speed) | +| `congestion_plot.py` | Plots the active DNN requests over time of a finished simulation | +| `csv_gen.sh` | Converts the simulator's trial outputs to CSV format (in `results/`) for further analysis | +| `dnn_classes.py` | Foundational class structures for representing deep neural network (DNN) architectures | +| `final_gen.sh` | Bash utility for batching process reading and processing of CSV files using the `read_csv.py` Python script | +| `gen_mixed.py` | Converts DNN request order into a network speed-specific schedule for simulation. | +| `make_order.py` | Generates and saves a random order of DNN requests | +| `models.py` | Provides a way to generate and represent the layers for popular DNNs such as BERT-Large, GPT-2 (Extra-Large), LeNet-300-100, AlexNet, ResNet-18, VGG-16, VGG-19, and Meta's DLRM | +| `read_csv.py` | Processes runtime data for DNNs executed on different processors and then stores the results in a TSV-formatted file | +| `README.md` | This file, describing the requirements and instructions to reproduce the experiments. | +| `requirements.txt` | List of all of the Python dependencies | +| `run.sh` | Conducts a series of simulations (via the `sim.py` script) on different types of processors | +| `sched_gen.sh` | Generates multiple DNN request traces for simulations | +| `sim_classes.py` | Useful data structures for simulation | +| `sim.py` | Event-driven simulator code | +| `trial_to_csv.py` | Parses trial file and extracts information about the simulation, specifically the average request completion times, total runtime, and the active request count over time, and stores that in CSV format. | +| `utils.py` | Utility functions for simulator | + +## Usage + +### 1. Install requirements +Verify that you have Python3 set up and then install necessary packages with `python3 -m pip install -r requirements.txt`. + +### 2. Launch simulations +Run simulations in parallel using `bash run.sh` (default configuration: 10 unique DNN traces over a 60Mbps network). Note: `run.sh` launches 40 simulations in parallel. + +If you'd like new schedules, execute `sched_gen.sh` before `run.sh`. + +### 3. Convert logs to CSVs +Once the 40 simulations initiated by `run.sh` are complete, execute `bash csv_gen.sh` to parse the trial logs. + +### 4. Generate average speedups by DNN model +Run `bash final_gen.sh` to generate a TSV-formatted file with the average runtimes of each DNN model over each processor. + +### 5. Other useful information +`congestion_plot.py` provide plots for active DNN request count over time for a single simulation. Be sure to read its ParseOpt functions to pass in the correct arguments. `job_stats/` includes the start and end times for each DNN layer of any past simulation. \ No newline at end of file diff --git a/simulation/congestion_plot.py b/simulation/congestion_plot.py new file mode 100644 index 0000000..b6da957 --- /dev/null +++ b/simulation/congestion_plot.py @@ -0,0 +1,76 @@ +from typing import Tuple, Dict, List +import matplotlib.pyplot as plt +import argparse + +def congestion_plot(lightning_over_t:List[Tuple[int,int]], \ + a100_over_t:List[Tuple[int,int]], \ + dpu_over_t:List[Tuple[int,int]], \ + brainwave_over_t:List[Tuple[int,int]], \ + out_filepath:str, + network_speed:float) -> None: + lightning_x_1 = [t[0] for t in lightning_over_t] + lightning_y_1 = [t[1] for t in lightning_over_t] + a100_x_1 = [t[0] for t in a100_over_t] + a100_y_1 = [t[1] for t in a100_over_t] + a100x_x_1 = [t[0] for t in dpu_over_t] + a100x_y_1 = [t[1] for t in dpu_over_t] + brainwave_x_1 = [t[0] for t in brainwave_over_t] + brainwave_y_1 = [t[1] for t in brainwave_over_t] + + fig, ax = plt.subplots() + ax.plot(a100_x_1, a100_y_1, label="A100") + ax.plot(a100x_x_1, a100x_y_1, label="A100X") + ax.plot(brainwave_x_1, brainwave_y_1, label="Brainwave") + ax.plot(lightning_x_1, lightning_y_1, label="Lightning") + + ax.set_xlabel('Time (ns)') + ax.set_ylabel('Active Requests') + ax.set_title(f'Active Requests vs Time for {network_speed}Gbps') + ax.legend() + plt.xscale("log") + + plt.savefig(out_filepath) + print(f"Output accessible in {out_filepath}") + +def ParseOpt(known=False): + parser = argparse.ArgumentParser() + parser.add_argument('--batch_size', type=int, help="maximum batch size for processor") + parser.add_argument('--lightning_core_count', type=int, help="number of cores for Lightning") + parser.add_argument('--num_reqs', type=int, help="exact number of requests to simulate") + parser.add_argument('--network_speed', type=str, help="network speed (in Gbps)") + parser.add_argument('--pkl_num', type=int, help="request schedule pickle file identifier") + parser.add_argument('--preemptive', type=str, help='P if preemptive scheduling (NP otherwise)') + opt = parser.parse_known_args()[0] if known else parser.parse_args() + + return opt + +if __name__=="__main__": + opt = ParseOpt() + + lightning_over_t = [] + a100_over_t = [] + dpu_over_t = [] + brainwave_over_t = [] + network_speed= opt.network_speed + with open(f"./results/active_reqs/lightning_{network_speed}_Gbps_l{opt.lightning_core_count}_cores_{opt.num_reqs}_reqs_{opt.batch_size}_BS_{opt.pkl_num}.csv", "r") as file: + lines = file.readlines() + for line in lines: + line_as_list = line.split(',') + lightning_over_t.append((float(line_as_list[0]),int(line_as_list[1]))) + with open(f"./results/active_reqs/a100_{network_speed}_Gbps_l{opt.lightning_core_count}_cores_{opt.num_reqs}_reqs_{opt.batch_size}_BS_{opt.pkl_num}.csv", "r") as file: + lines = file.readlines() + for line in lines: + line_as_list = line.split(',') + a100_over_t.append((float(line_as_list[0]),int(line_as_list[1]))) + with open(f"./results/active_reqs/dpu_{network_speed}_Gbps_l{opt.lightning_core_count}_cores_{opt.num_reqs}_reqs_{opt.batch_size}_BS_{opt.pkl_num}.csv", "r") as file: + lines = file.readlines() + for line in lines: + line_as_list = line.split(',') + dpu_over_t.append((float(line_as_list[0]),int(line_as_list[1]))) + with open(f"./results/active_reqs/brainwave_{network_speed}_Gbps_l{opt.lightning_core_count}_cores_{opt.num_reqs}_reqs_{opt.batch_size}_BS_{opt.pkl_num}.csv", "r") as file: + lines = file.readlines() + for line in lines: + line_as_list = line.split(',') + brainwave_over_t.append((float(line_as_list[0]),int(line_as_list[1]))) + out_filepath = f"final_{opt.preemptive}_request_count_vs_time_{network_speed}_Gbps_l{opt.lightning_core_count}_{opt.batch_size}_BS_{opt.num_reqs}_reqs_{opt.pkl_num}.png" + congestion_plot(lightning_over_t, a100_over_t, dpu_over_t, brainwave_over_t, out_filepath, opt.network_speed) \ No newline at end of file diff --git a/simulation/csv_gen.sh b/simulation/csv_gen.sh new file mode 100644 index 0000000..15d7a8b --- /dev/null +++ b/simulation/csv_gen.sh @@ -0,0 +1,16 @@ +NETWORK_SPEED="0.06" +NUM_REQS=100 +LIGHTNING_BATCH_SIZE=1 +LIGHTNING_CORE_COUNT=576 + +mkdir results +mkdir results/runtimes +mkdir results/active_reqs + +for PKL_NUM in 1 2 3 4 5 6 7 8 9 10 +do + for PROCESSOR in "lightning" "a100" "dpu" "brainwave" + do + python3 trial_to_csv.py --lightning_core_count=$LIGHTNING_CORE_COUNT --batch_size=$LIGHTNING_BATCH_SIZE --num_reqs=$NUM_REQS --network_speed=$NETWORK_SPEED --pkl_num=$PKL_NUM --processor=$PROCESSOR & + done +done \ No newline at end of file diff --git a/simulation/dnn_classes.py b/simulation/dnn_classes.py new file mode 100644 index 0000000..58cc31c --- /dev/null +++ b/simulation/dnn_classes.py @@ -0,0 +1,79 @@ +from typing import Tuple, List, Dict, Set + +class Layer(): + ''' + Layer of DNN + ''' + def __init__(self, id, prereqs, children): + self.id = id + self.prereqs = prereqs + self.children = children + + +class ConvLayer(Layer): + ''' + Convolutional layer of DNN + ''' + def __init__(self, id:int, input_channels:int, kernel_size:int, output_shape:Tuple[int,int,int,int], prereqs=set(), children=[]) -> None: + ''' + Parameters + ---------- + input_channels: number of channels of input to layer + kernel_size: size of kernel edge used in convolution on input in this layer + output_shape: tuple (batch_num, output_channels, output_height, output_width) + ''' + super().__init__(id, prereqs, children) + self.input_channels = input_channels + self.kernel_size = kernel_size + self.output_shape = output_shape + + +class FCLayer(Layer): + ''' + Fully-connected layer of DNN + ''' + def __init__(self, id:int, input_size:int, output_size:int, prereqs=set(), children=[]) -> None: + ''' + Parameters + ---------- + input_size: size of vectors being multiplied in layer + output_size: number of products computed in layer + ''' + super().__init__(id, prereqs, children) + self.input_size = input_size + self.output_size = output_size + + +class Model(): + ''' + DNN Architecture + ''' + def __init__(self, input_dims:Tuple[int,int], input_channels:float, layers:List[Layer]) -> None: + ''' + Parameters + ---------- + input_dims: height and width of input + input_channels: number of bytes the channels occupy + layers: list of layers that make up DNN + ''' + self.input_size = input_dims[0] * input_dims[1] * input_channels # in bytes + self.layers = layers + + +class ReadableModel(): + ''' + DNN in simulation-friendly format + ''' + def __init__(self, name:str, layer_index:Dict[int,Tuple[int,int,List[int]]], prereqs:Dict[int,Set[int]], independent_layers:Set[int]) -> None: + ''' + Parameters + ---------- + name: name of DNN + layer_index: table of layer id -> (vector_len, VVPs, children) + prereqs: table of layer id -> layers they're dependent on (that haven't been computed yet) + independent_layers: set of layers that don't have any prereqs + ''' + self.name = name + self.layer_index = layer_index + self.prereqs = prereqs + self.independent_layers = independent_layers \ No newline at end of file diff --git a/simulation/final_gen.sh b/simulation/final_gen.sh new file mode 100644 index 0000000..7ad3727 --- /dev/null +++ b/simulation/final_gen.sh @@ -0,0 +1,8 @@ +CORES=576 +BS=1 +PREEMPTIVE="P" + +for NS in 0.06 +do + python3 read_csv.py --num_reqs=100 --lightning_core_count=$CORES --network_speed=$NS --batch_size=$BS --preemptive=$PREEMPTIVE & +done \ No newline at end of file diff --git a/simulation/gen_mixed.py b/simulation/gen_mixed.py new file mode 100644 index 0000000..044c7ca --- /dev/null +++ b/simulation/gen_mixed.py @@ -0,0 +1,49 @@ +from typing import List, Tuple +from models import MODELS +import numpy as np +import pickle +import argparse + +def gen_mixed_arrivals(order:List[str], network_speed:float, poisson=True) -> List[Tuple[str,float]]: + ''' + Generates a sequence of mixed arrivals + + Parameters + ---------- + order: names of models in the order to be scheduled + network_speed: network speed in Gbps + poisson: whether interarrivals should be randomly distributed (otherwise even) + + Returns + ------- + schedule: sequence of mixed arrivals (model names and their arrival times in ns) + ''' + schedule = [] + time = 0 + for model_name in order: + bit_stream = MODELS[model_name].input_size*8 # bytes => bits + interarrival_space = bit_stream / network_speed # in ns + if poisson: + time += round(np.random.exponential(interarrival_space)) + else: + time += interarrival_space + schedule.append((model_name, time)) + return schedule + +def ParseOpt(known=False): + parser = argparse.ArgumentParser() + parser.add_argument('--network_speed', type=float, help="network speed (in Gbps)") + parser.add_argument('--pkl_num', type=int, help="request schedule pickle file identifier") + opt = parser.parse_known_args()[0] if known else parser.parse_args() + + return opt + +if __name__=="__main__": + opt = ParseOpt() + with open(f"./orders/order_{opt.pkl_num}.pkl", "rb") as file: + rand_order = pickle.load(file) + sched = gen_mixed_arrivals(rand_order, opt.network_speed) + sched_filename = f'sim_scheds/mixed_sched_{opt.network_speed}_Gbps_{opt.pkl_num}.pkl' + with open(sched_filename, 'wb') as file: + pickle.dump(sched, file) + print(f"Schedule accessible at {sched_filename}") \ No newline at end of file diff --git a/simulation/make_order.py b/simulation/make_order.py new file mode 100644 index 0000000..97cb576 --- /dev/null +++ b/simulation/make_order.py @@ -0,0 +1,34 @@ +from typing import List +import argparse +import random +import pickle + +def gen_random_order(num_reqs:int, possible_models:List[str]) -> List[str]: + ''' + Generates a random order of DNN requests + + Parameters + ---------- + num_reqs: number of request in order + possible_models: names of DNNs to randomly schedule + + Returns + ------- + o: ordering of `num_reqs` number of DNN requests + ''' + return [random.choice(possible_models) for _ in range(num_reqs)] + +def ParseOpt(known=False): + parser = argparse.ArgumentParser() + parser.add_argument('--num_reqs', type=int, help="exact number of requests to simulate") + parser.add_argument('--pkl_num', type=int, help="request order pickle file identifier") + opt = parser.parse_known_args()[0] if known else parser.parse_args() + + return opt + +if __name__=="__main__": + opt = ParseOpt() + possible_models = ["AlexNet", "ResNet-18", "VGG-16", "VGG-19", "BERT", "GPT-2", "DLRM"] + rand_order = gen_random_order(opt.num_reqs, possible_models) + with open(f'orders/order_{opt.pkl_num}.pkl', 'wb') as file: + pickle.dump(rand_order, file) \ No newline at end of file diff --git a/simulation/models.py b/simulation/models.py new file mode 100644 index 0000000..b805a18 --- /dev/null +++ b/simulation/models.py @@ -0,0 +1,284 @@ +from dnn_classes import Model, ConvLayer, FCLayer, ReadableModel +from tqdm import tqdm +from typing import List +import math + +def make_bert() -> List[FCLayer]: + ''' + Generates layers for BERT-Large model. + + Returns + ------- + layers: list of layers for BERT-Large + ''' + children:List[int] = [] + layers = [ + FCLayer(1, 29056, 1024, children=children), # word embeddings + # position embeddings (not multiplication) + # token embeddings (not multiplication) + # ^ these embeddings are summed to get input embedding + ] + prereqs = set([1]) + offset = 1 + # 24 encoders + for enc_id in range(24): + concat_prereqs = set() + # MULTI-HEADED ATTENTION (16x parallel) + for _ in range(16): + layers.append(FCLayer(offset+1, 1024, int(512*1024/16), prereqs=prereqs, children=[offset+4])) # linear projection of Q (QW_i^Q) + layers.append(FCLayer(offset+2, 1024, int(512*1024/16), prereqs=prereqs, children=[offset+4])) # linear projection of K (KW_i^K) + layers.append(FCLayer(offset+3, 1024, int(512*1024/16), prereqs=prereqs, children=[offset+5])) # linear projection of V (VW_i^V) + children.extend([offset+1,offset+2,offset+3]) + layers.append(FCLayer(offset+4, int(1024/16), 512*512, prereqs=set([offset+1,offset+2]), children=[offset+5])) # QW_i^Q x (KW_i^K)^T + # scaling (not matmul) + # softmax (not matmul) + layers.append(FCLayer(offset+5, 512, int(512*1024/16), prereqs=set([offset+3,offset+4]), children=[enc_id*(16*5+3)+1+16*5+1])) # softmax x VW_i^V + concat_prereqs.add(offset+5) + offset += 5 + + # concat x W^O + layers.append(FCLayer(offset+1, 1024, 512*1024, prereqs=concat_prereqs, children=[offset+2])) + # add+norm (no matmul) + # FEED-FORWARD + # 1. Linear Transformation + layers.append(FCLayer(offset+2, 1024, 512*4096, prereqs=set([offset+1]), children=[offset+3])) + # 2. Activation Function (no matmul) + children = [] + # 3. Linear Transformation + layers.append(FCLayer(offset+3, 4096, 512*1024, prereqs=set([offset+2]), children=children)) + prereqs = set([offset+3]) + # add+norm (no matmul) + offset += 3 + + # pooler (not considering) + + return layers + +def make_gpt2() -> List[FCLayer]: + ''' + Generates layers for GPT-2 (Extra-Large) model. + + Returns + ------- + layers: list of layers for GPT-2 (Extra-Large) + ''' + children:List[int] = [] + layers = [ + FCLayer(1, 50257, 1600, children=children), # input embeddings + FCLayer(2, 1024, 1600, children=children) # positional embeddings + ] + offset = 2 + prereqs = set([1,2]) + # 48 encoders + for enc_id in range(48): + concat_prereqs = set() + # MULTI-HEADED ATTENTION (25x parallel) + for _ in range(25): + layers.append(FCLayer(offset+1, 1600, int(1024*1600/25), prereqs=prereqs, children=[offset+4])) # linear projection of Q (QW_i^Q) + layers.append(FCLayer(offset+2, 1600, int(1024*1600/25), prereqs=prereqs, children=[offset+4])) # linear projection of K (KW_i^K) + layers.append(FCLayer(offset+3, 1600, int(1024*1600/25), prereqs=prereqs, children=[offset+5])) # linear projection of V (VW_i^V) + children.extend([offset+1,offset+2,offset+3]) + layers.append(FCLayer(offset+4, int(1600/25), 1024*1024, prereqs=set([offset+1,offset+2]), children=[offset+5])) # QW_i^Q x (KW_i^K)^T + # scaling (not matmul) + # softmax (not matmul) + layers.append(FCLayer(offset+5, 1024, int(1024*1600/25), prereqs=set([offset+3,offset+4]), children=[enc_id*(25*5+3)+2+25*5+1])) # softmax x VW_i^V + concat_prereqs.add(offset+5) + offset += 5 + # concat x W^O + layers.append(FCLayer(offset+1, 1600, 1024*1600, prereqs=concat_prereqs, children=[offset+2])) + # add+norm (no matmul) + # FEED-FORWARD + # 1. Linear Transformation + layers.append(FCLayer(offset+2, 1600, 1024*6400, prereqs=set([offset+1]), children=[offset+3])) + # 2. Activation Function (no matmul) + children = [] + # 3. Linear Transformation + layers.append(FCLayer(offset+3, 6400, 1024*1600, prereqs=set([offset+2]), children=children)) + prereqs = set([offset+3]) + # add+norm (no matmul) + offset += 3 + layers.append(FCLayer(offset+1, 1600, 1024*50257, prereqs=set([offset]))) + children.append(offset+1) + return layers + +MODELS = { + "LeNet-300-100": Model((28,28), 0.125, [ # 0.125 (or 1/8) channels because only uses 1 bit to encode + FCLayer(1, 784, 300, children=[2]), + FCLayer(2, 300, 100, prereqs=set([1]), children=[3]), + FCLayer(3, 100, 10, prereqs=set([2])) + ]), + "AlexNet": Model((224,224), 3, [ + ConvLayer(1, 3, 11, (-1, 64, 55, 55), children=[2]), # layer 1 + ConvLayer(2, 64, 5, (-1, 192, 27, 27), prereqs=set([1]), children=[3]), # layer 4 + ConvLayer(3, 192, 3, (-1, 384, 13, 13), prereqs=set([2]), children=[4]), # layer 7 + ConvLayer(4, 384, 3, (-1, 256, 13, 13), prereqs=set([3]), children=[5]), # layer 9 + ConvLayer(5, 256, 3, (-1, 256, 13, 13), prereqs=set([4]), children=[6]), # layer 11 + FCLayer(6, 256*6*6, 4096, prereqs=set([5]), children=[7]), # layer 16 + FCLayer(7, 4096, 4096, prereqs=set([6]), children=[8]), # layer 19 + FCLayer(8, 4096, 1000, prereqs=set([7])) # layer 21 + ]), + "ResNet-18": Model((224,224), 3, [ + ConvLayer(1, 3, 7, (-1, 64, 112, 112), children=[2]), # layer 1 + ConvLayer(2, 64, 3, (-1, 64, 56, 56), prereqs=set([1]), children=[3]), # layer 5 + ConvLayer(3, 64, 3, (-1, 64, 56, 56), prereqs=set([2]), children=[4]), # layer 8 + ConvLayer(4, 64, 3, (-1, 64, 56, 56), prereqs=set([3]), children=[5]), # layer 12 + ConvLayer(5, 64, 3, (-1, 64, 56, 56), prereqs=set([4]), children=[6]), # layer 15 + ConvLayer(6, 64, 3, (-1, 128, 28, 28), prereqs=set([5]), children=[7]), # layer 19 + ConvLayer(7, 128, 3, (-1, 128, 28, 28), prereqs=set([6]), children=[8]), # layer 22 + ConvLayer(8, 64, 1, (-1, 128, 28, 28), prereqs=set([7]), children=[9]), # layer 24, projection shortcuts + ConvLayer(9, 128, 3, (-1, 128, 28, 28), prereqs=set([8]), children=[10]), # layer 28 + ConvLayer(10, 128, 3, (-1, 128, 28, 28), prereqs=set([9]), children=[11]), # layer 31 + ConvLayer(11, 128, 3, (-1, 256, 14, 14), prereqs=set([10]), children=[12]), # layer 35 + ConvLayer(12, 256, 3, (-1, 256, 14, 14), prereqs=set([11]), children=[13]), # layer 38 + ConvLayer(13, 128, 1, (-1, 256, 14, 14), prereqs=set([12]), children=[14]), # layer 40, projection shortcuts + ConvLayer(14, 256, 3, (-1, 256, 14, 14), prereqs=set([13]), children=[15]), # layer 44 + ConvLayer(15, 256, 3, (-1, 256, 14, 14), prereqs=set([14]), children=[16]), # layer 47 + ConvLayer(16, 256, 3, (-1, 512, 7, 7), prereqs=set([15]), children=[17]), # layer 51 + ConvLayer(17, 512, 3, (-1, 512, 7, 7), prereqs=set([16]), children=[18]), # layer 54 + ConvLayer(18, 256, 1, (-1, 512, 7, 7), prereqs=set([17]), children=[19]), # layer 56, projection shortcuts + ConvLayer(19, 512, 3, (-1, 512, 7, 7), prereqs=set([18]), children=[20]), # layer 60 + ConvLayer(20, 512, 3, (-1, 512, 7, 7), prereqs=set([19]), children=[21]), # layer 63 + FCLayer(21, 512, 1000, prereqs=set([20])) # layer 68 + ]), + "VGG-16": Model((224,224), 3, [ + ConvLayer(1, 3, 3, (-1, 64, 224, 224), children=[2]), # layer 1 + ConvLayer(2, 64, 3, (-1, 64, 224, 224), prereqs=set([1]), children=[3]), # layer 3 + ConvLayer(3, 64, 3, (-1, 128, 112, 112), prereqs=set([2]), children=[4]), # layer 6 + ConvLayer(4, 128, 3, (-1, 128, 112, 112), prereqs=set([3]), children=[5]), # layer 8 + ConvLayer(5, 128, 3, (-1, 256, 56, 56), prereqs=set([4]), children=[6]), # layer 11 + ConvLayer(6, 256, 3, (-1, 256, 56, 56), prereqs=set([5]), children=[7]), # layer 13 + ConvLayer(7, 256, 3, (-1, 256, 56, 56), prereqs=set([6]), children=[8]), # layer 15 + ConvLayer(8, 256, 3, (-1, 512, 28, 28), prereqs=set([7]), children=[9]), # layer 18 + ConvLayer(9, 512, 3, (-1, 512, 28, 28), prereqs=set([8]), children=[10]), # layer 20 + ConvLayer(10, 512, 3, (-1, 512, 28, 28), prereqs=set([9]), children=[11]), # layer 22 + ConvLayer(11, 512, 3, (-1, 512, 14, 14), prereqs=set([10]), children=[12]), # layer 25 + ConvLayer(12, 512, 3, (-1, 512, 14, 14), prereqs=set([11]), children=[13]), # layer 27 + ConvLayer(13, 512, 3, (-1, 512, 14, 14), prereqs=set([12]), children=[14]), # layer 29 + FCLayer(14, 25088, 4096, prereqs=set([13]), children=[15]), # layer 33 + FCLayer(15, 4096, 4096, prereqs=set([14]), children=[16]), # layer 36 + FCLayer(16, 4096, 1000, prereqs=set([15])) # layer 39 + ]), + "VGG-19": Model((224,224), 3, [ + ConvLayer(1, 3, 3, (-1, 64, 224, 224), children=[2]), # layer 1 + ConvLayer(2, 64, 3, (-1, 64, 224, 224), prereqs=set([1]), children=[3]), # layer 3 + ConvLayer(3, 64, 3, (-1, 128, 112, 112), prereqs=set([2]), children=[4]), # layer 6 + ConvLayer(4, 128, 3, (-1, 128, 112, 112), prereqs=set([3]), children=[5]), # layer 8 + ConvLayer(5, 128, 3, (-1, 256, 56, 56), prereqs=set([4]), children=[6]), # layer 11 + ConvLayer(6, 256, 3, (-1, 256, 56, 56), prereqs=set([5]), children=[7]), # layer 13 + ConvLayer(7, 256, 3, (-1, 256, 56, 56), prereqs=set([6]), children=[8]), # layer 15 + ConvLayer(8, 256, 3, (-1, 256, 56, 56), prereqs=set([7]), children=[9]), # layer 17 + ConvLayer(9, 256, 3, (-1, 512, 28, 28), prereqs=set([8]), children=[10]), # layer 20 + ConvLayer(10, 512, 3, (-1, 512, 28, 28), prereqs=set([9]), children=[11]), # layer 22 + ConvLayer(11, 512, 3, (-1, 512, 28, 28), prereqs=set([10]), children=[12]), # layer 24 + ConvLayer(12, 512, 3, (-1, 512, 28, 28), prereqs=set([11]), children=[13]), # layer 26 + ConvLayer(13, 512, 3, (-1, 512, 14, 14), prereqs=set([12]), children=[14]), # layer 29 + ConvLayer(14, 512, 3, (-1, 512, 14, 14), prereqs=set([13]), children=[15]), # layer 31 + ConvLayer(15, 512, 3, (-1, 512, 14, 14), prereqs=set([14]), children=[16]), # layer 33 + ConvLayer(16, 512, 3, (-1, 512, 14, 14), prereqs=set([15]), children=[17]), # layer 35 + FCLayer(17, 25088, 4096, prereqs=set([16]), children=[18]), # layer 39 + FCLayer(18, 4096, 4096, prereqs=set([17]), children=[19]), # layer 42 + FCLayer(19, 4096, 1000, prereqs=set([18])) # layer 45 + ]), + "BERT": Model((512,1), 10, make_bert()), # 10 bytes per token for simplicity + "GPT-2": Model((1024,1), 10, make_gpt2()), + "DLRM": Model((512, 1), 10, [ + # embeddings + FCLayer(1, 9980333, 64, children=[30]), + FCLayer(2, 36084, 64, children=[30]), + FCLayer(3, 17217, 64, children=[30]), + FCLayer(4, 7378, 64, children=[30]), + FCLayer(5, 20134, 64, children=[30]), + FCLayer(6, 3, 64, children=[30]), + FCLayer(7, 7112, 64, children=[30]), + FCLayer(8, 1442, 64, children=[30]), + FCLayer(9, 61, 64, children=[30]), + FCLayer(10, 9758201, 64, children=[30]), + FCLayer(11, 1333352, 64, children=[30]), + FCLayer(12, 313829, 64, children=[30]), + FCLayer(13, 10, 64, children=[30]), + FCLayer(14, 2208, 64, children=[30]), + FCLayer(15, 11156, 64, children=[30]), + FCLayer(16, 122, 64, children=[30]), + FCLayer(17, 4, 64, children=[30]), + FCLayer(18, 970, 64, children=[30]), + FCLayer(19, 14, 64, children=[30]), + FCLayer(20, 11156, 64, children=[30]), + FCLayer(21, 7267859, 64, children=[30]), + FCLayer(22, 9946608, 64, children=[30]), + FCLayer(23, 415421, 64, children=[30]), + FCLayer(24, 12420, 64, children=[30]), + FCLayer(25, 101, 64, children=[30]), + FCLayer(26, 36, 64, children=[30]), + # bottom MLP + FCLayer(27, 13, 512, children=[28]), + FCLayer(28, 512, 256, prereqs=set([27]), children=[29]), + FCLayer(29, 256, 64, prereqs=set([28]), children=[30]), + # top MLP + FCLayer(30, 415, 512, prereqs=set([i+1 for i in range(26)]+[29]), children=[31]), + FCLayer(31, 512, 512, prereqs=set([30]), children=[32]), + FCLayer(32, 512, 256, prereqs=set([31]), children=[33]), + FCLayer(33, 256, 1, prereqs=set([32])) + ]) +} + +def build_model(model_name:str, min_vec_size=1000000) -> ReadableModel: + ''' + Generates layers of DNN model in simulation-friendly format + + Parameters + ---------- + model_name: name of model to load + min_vec_size: minimum length of vector multiplication (for granularity) + + Returns + ------- + model: model in simulation-readable format (see ReadableModel spec) + ''' + m = MODELS[model_name] + layer_sets = m.layers + layer_index = {} # layer id -> (vector_len, num_VVPs, children) + prereqs = {} # layer -> layers they're dependent on (that haven't been computed yet) + independent_layers = set() # layers that don't have any prereqs + for layer in tqdm(layer_sets, desc=f'Building {model_name}...'): + if isinstance(layer, ConvLayer): + vector_length = layer.kernel_size ** 2 + batch_num, output_channels, output_height, output_width = layer.output_shape + vvps = layer.input_channels * output_channels * output_height * output_width + if vector_length >= min_vec_size: + layer_index[layer.id] = (vector_length, vvps, layer.children.copy()) # to prevent aliasing + else: + layer_index[layer.id] = (min_vec_size, math.ceil(vector_length*vvps/min_vec_size), layer.children.copy()) + elif isinstance(layer, FCLayer): + if layer.input_size >= min_vec_size: + layer_index[layer.id] = (layer.input_size, layer.output_size, layer.children.copy()) # to prevent aliasing + else: + layer_index[layer.id] = (min_vec_size, math.ceil(layer.input_size*layer.output_size/min_vec_size), layer.children.copy()) + if layer.prereqs: + prereqs[layer.id] = layer.prereqs.copy() + else: + independent_layers.add(layer.id) + return ReadableModel(model_name, layer_index, prereqs, independent_layers) + +if __name__ == "__main__": + filename = "./results/power_utilization.tsv" + data = "" + lightning_power = {} + for name, power, gran, cores, freq in [("Lightning", 91.875, 1, 1024, 97), ("A100", 250, 2048, 6912, 1.41), ("A100X", 300, 2048, 6912, 1.41), ("Brainwave", 125, 2048, 96000, 0.25)]: + data += f"{name}\n" + for model in ["AlexNet", "ResNet-18", "VGG-16", "VGG-19", "BERT", "GPT-2", "DLRM"]: + r_model = build_model(model, gran) + total_multiplications = 0 + for l_id in r_model.layer_index: + total_multiplications += r_model.layer_index[l_id][0]*r_model.layer_index[l_id][1] + print(total_multiplications) + m_power = total_multiplications*power/cores/freq/(10**9) + data += f"{model}\t{m_power}" + if name == "Lightning": + lightning_power[model] = m_power + else: + data += f"\t{m_power/lightning_power[model]}" + data += "\n" + data += "\n" + with open(filename, "w") as file: + file.write(data) + print(f"Output accessible at {filename}") \ No newline at end of file diff --git a/simulation/orders/order_1.pkl b/simulation/orders/order_1.pkl new file mode 100644 index 0000000..f23be08 Binary files /dev/null and b/simulation/orders/order_1.pkl differ diff --git a/simulation/orders/order_10.pkl b/simulation/orders/order_10.pkl new file mode 100644 index 0000000..4f1522d Binary files /dev/null and b/simulation/orders/order_10.pkl differ diff --git a/simulation/orders/order_2.pkl b/simulation/orders/order_2.pkl new file mode 100644 index 0000000..2711a31 Binary files /dev/null and b/simulation/orders/order_2.pkl differ diff --git a/simulation/orders/order_3.pkl b/simulation/orders/order_3.pkl new file mode 100644 index 0000000..dbeee44 Binary files /dev/null and b/simulation/orders/order_3.pkl differ diff --git a/simulation/orders/order_4.pkl b/simulation/orders/order_4.pkl new file mode 100644 index 0000000..f845364 Binary files /dev/null and b/simulation/orders/order_4.pkl differ diff --git a/simulation/orders/order_5.pkl b/simulation/orders/order_5.pkl new file mode 100644 index 0000000..1af4753 Binary files /dev/null and b/simulation/orders/order_5.pkl differ diff --git a/simulation/orders/order_6.pkl b/simulation/orders/order_6.pkl new file mode 100644 index 0000000..241ddc4 Binary files /dev/null and b/simulation/orders/order_6.pkl differ diff --git a/simulation/orders/order_7.pkl b/simulation/orders/order_7.pkl new file mode 100644 index 0000000..bb79e23 Binary files /dev/null and b/simulation/orders/order_7.pkl differ diff --git a/simulation/orders/order_8.pkl b/simulation/orders/order_8.pkl new file mode 100644 index 0000000..63e4f02 Binary files /dev/null and b/simulation/orders/order_8.pkl differ diff --git a/simulation/orders/order_9.pkl b/simulation/orders/order_9.pkl new file mode 100644 index 0000000..dee3aec Binary files /dev/null and b/simulation/orders/order_9.pkl differ diff --git a/simulation/read_csv.py b/simulation/read_csv.py new file mode 100644 index 0000000..48c35d5 --- /dev/null +++ b/simulation/read_csv.py @@ -0,0 +1,67 @@ +import argparse + +def read_csv(file_prefix:str, file_suffix:str) -> str: + ''' + Generates list of runtimes in appropriate display form + ''' + time_dict = {} + + for proc in ["lightning", "a100", "dpu", "brainwave"]: + with open(file_prefix + proc + file_suffix, "r") as file: + lines = file.readlines() + for line in lines: + key_val = line.split(",") + time_dict[proc + key_val[0]] = float(key_val[1]) + + output = "\tLightning\tA100\tA100X\tBrainwave\tA100/Lightning\tA100X/Lightning\tBrainwave/Lightning\n" + + max_ratios = {} + + for model in ["AlexNet", "ResNet-18", "VGG-16", "VGG-19", "BERT", "GPT-2", "DLRM"]: + output += f"{model}" + for proc in ["lightning", "a100", "dpu", "brainwave"]: + output += f"\t{time_dict[proc + model]}" + lightning_time = time_dict["lightning" + model] + for proc in ["a100", "dpu", "brainwave"]: + ratio = time_dict[proc+model]/lightning_time + output += f"\t{ratio}" + if proc in max_ratios: + max_ratios[proc] = max(max_ratios[proc], ratio) + else: + max_ratios[proc] = ratio + output += "\n" + + output += "Max\t\t\t\t\t" + for proc in ["a100", "dpu", "brainwave"]: + output += f"\t{max_ratios[proc]}" + + return output + +def ParseOpt(known=False): + parser = argparse.ArgumentParser() + parser.add_argument('--batch_size', type=int, help="maximum batch size for processor") + parser.add_argument('--lightning_core_count', type=int, help="number of cores for Lightning") + parser.add_argument('--num_reqs', type=int, help="exact number of requests to simulate") + parser.add_argument('--network_speed', type=float, help="network speed (in Gbps)") + parser.add_argument('--pkl_num', type=int, help="request schedule pickle file identifier") + parser.add_argument('--preemptive', type=str, help="whether there is a preemptive flag") + opt = parser.parse_known_args()[0] if known else parser.parse_args() + + return opt + +if __name__=="__main__": + opt = ParseOpt() + prefix = f"results/runtimes/" + new_str = "" + for pkl_num in range(1,11): + try: + suffix = f"_{opt.network_speed}_Gbps_l{opt.lightning_core_count}_cores_{opt.num_reqs}_reqs_{opt.batch_size}_BS_{pkl_num}.csv" + new_str += read_csv(prefix, suffix) + "\n" + print(f"SUCCESS: {opt.preemptive}, {opt.network_speed}Gbps, {opt.lightning_core_count} cores, {opt.num_reqs} reqs, {opt.batch_size} BS, {pkl_num} pickle num") + except: + print(f"Result for {opt.preemptive}, {opt.network_speed}Gbps, {opt.lightning_core_count} cores, {opt.num_reqs} reqs, {opt.batch_size} BS, {pkl_num} pickle num failed...") + continue + out_filename = f"results/runtimes/P_" + "final" + f"_{opt.network_speed}_Gbps_l{opt.lightning_core_count}_cores_{opt.num_reqs}_reqs_{opt.batch_size}_BS.tsv" + with open(out_filename, "w") as file: + file.write(new_str) + print(f"Output accessible at {out_filename}.") \ No newline at end of file diff --git a/simulation/requirements.txt b/simulation/requirements.txt new file mode 100644 index 0000000..3839fda --- /dev/null +++ b/simulation/requirements.txt @@ -0,0 +1,24 @@ +certifi==2023.5.7 +charset-normalizer==3.1.0 +contourpy==1.0.7 +cycler==0.11.0 +filelock==3.12.1 +fonttools==4.40.0 +idna==3.4 +Jinja2==3.1.2 +kiwisolver==1.4.4 +MarkupSafe==2.1.3 +matplotlib==3.7.1 +mpmath==1.3.0 +networkx==3.1 +numpy==1.24.3 +packaging==23.1 +Pillow==9.5.0 +pyparsing==3.0.9 +python-dateutil==2.8.2 +requests==2.31.0 +six==1.16.0 +sympy==1.12 +tqdm==4.65.0 +typing_extensions==4.6.3 +urllib3==2.0.3 diff --git a/simulation/run.sh b/simulation/run.sh new file mode 100644 index 0000000..62e6bef --- /dev/null +++ b/simulation/run.sh @@ -0,0 +1,19 @@ +NETWORK_SPEED=0.06 +NUM_REQS=100 +LIGHTNING_CORE_COUNT=576 +LIGHTNING_BATCH_SIZE=1 + +mkdir trials +mkdir job_stats + +for PKL_NUM in 1 2 3 4 5 6 7 8 9 10 +do + python3 sim.py --processor="Lightning-1-200-100" --gran=2048 --network_speed=$NETWORK_SPEED --lightning_core_count=$LIGHTNING_CORE_COUNT --batch_size=$LIGHTNING_BATCH_SIZE --preemptive=True --pkl_num=$PKL_NUM --last_req=100 > trials/lightning_$NETWORK_SPEED\_Gbps_l$LIGHTNING_CORE_COUNT\_cores_$NUM_REQS\_reqs_$LIGHTNING_BATCH_SIZE\_BS_$PKL_NUM.txt & + sleep 2; + python3 sim.py --processor="A100" --gran=2048 --network_speed=$NETWORK_SPEED --batch_size=1 --preemptive=True --pkl_num=$PKL_NUM --last_req=100 > trials/a100_$NETWORK_SPEED\_Gbps_l$LIGHTNING_CORE_COUNT\_cores_$NUM_REQS\_reqs_$LIGHTNING_BATCH_SIZE\_BS_$PKL_NUM.txt & + sleep 2; + python3 sim.py --processor="DPU" --gran=2048 --network_speed=$NETWORK_SPEED --batch_size=1 --preemptive=True --pkl_num=$PKL_NUM --last_req=100 > trials/dpu_$NETWORK_SPEED\_Gbps_l$LIGHTNING_CORE_COUNT\_cores_$NUM_REQS\_reqs_$LIGHTNING_BATCH_SIZE\_BS_$PKL_NUM.txt & + sleep 2; + python3 sim.py --processor="Brainwave" --gran=2048 --network_speed=$NETWORK_SPEED --batch_size=1 --preemptive=True --pkl_num=$PKL_NUM --last_req=100 > trials/brainwave_$NETWORK_SPEED\_Gbps_l$LIGHTNING_CORE_COUNT\_cores_$NUM_REQS\_reqs_$LIGHTNING_BATCH_SIZE\_BS_$PKL_NUM.txt & + sleep 2; +done diff --git a/simulation/sched_gen.sh b/simulation/sched_gen.sh new file mode 100644 index 0000000..ef29f87 --- /dev/null +++ b/simulation/sched_gen.sh @@ -0,0 +1,11 @@ +NETWORK_SPEEDS=(0.01 0.02 0.03 0.04 0.05 0.06) + +for PKL_NUM in 1 2 3 4 5 6 7 8 9 10 +do + python3 make_order.py --num_reqs=5000 --pkl_num=$PKL_NUM + + for NS in $NETWORK_SPEEDS + do + python3 gen_mixed.py --network_speed=$NS --pkl_num=$PKL_NUM + done +done \ No newline at end of file diff --git a/simulation/sim.py b/simulation/sim.py new file mode 100644 index 0000000..d15938d --- /dev/null +++ b/simulation/sim.py @@ -0,0 +1,454 @@ +from sim_classes import Processor, Request, Job, Event, Task, JobEnd +from dnn_classes import ReadableModel +from collections import deque +from heapq import merge +from typing import List, Dict, Tuple, Set, Union +import math +from models import build_model +from utils import gen_jobs +import argparse +import time +import pickle + +PROCESSORS = { + 'Brainwave': Processor('Brainwave', 96000, 0.25, { + "AlexNet": 0, + "ResNet-18": 0, + "VGG-16": 0, + "VGG-19": 0, + "BERT": 0, + "GPT-2": 0, + "ChatGPT": 0, + "DLRM": 0 + }), + 'A100': Processor('A100', 6912, 1.41, { + "AlexNet": 581000, + "ResNet-18": 615000, + "VGG-16": 607000, + "VGG-19": 596000, + "BERT": 1176000, + "GPT-2": 6605000, + "ChatGPT": 6605000*116, # estimated based on model size + "DLRM": 6605000*2 + }), + 'P4': Processor('P4', 2560, 1.114, { + "AlexNet": 1600000, + "ResNet-18": 1532000, + "VGG-16": 1555000, + "VGG-19": 1552000, + "BERT": 3702000, + "GPT-2": 12761000, + "ChatGPT": 12761000*116, # estimated based on model size + "DLRM": 12761000*2 + }), + 'DPU': Processor('DPU', 6912, 1.41, { + "AlexNet": 0, + "ResNet-18": 0, + "VGG-16": 0, + "VGG-19": 0, + "BERT": 0, + "GPT-2": 0, + "ChatGPT": 0, + "DLRM": 0 + }) +} + +class Simulator(): + def __init__(self, processor:Processor, pkl_num:int, last_req:int, batch_size=1, preemptive=False) -> None: + self.processor = processor # selected processor for simulation + self.last_req = last_req # last req to end simulation on + self.dpl = processor.dpl # table of datapath latencies (in ts) before every request + self.overhead_factor = processor.overhead_factor # latency factor between layers of request (proportional to input size of next layer) + self.cores = [Core(i) for i in range(processor.num_cores)] # cores that the simulator will run on + self.batch_size = batch_size # number of similar requests that are batched in cases of congestion + self.next_core = 0 # core that follows the last scheduled core (round-robin style) + self.queue:deque = deque([]) # holds all scheduled events + self.time = 0 # simulator's internal time (in ts) + self.preemptive = preemptive # whether simulation employs preemptive (or non-preemptive) scheduling strategy + self.req_id = 0 # ID that will be assigned to the next Request + self.req_start_times:Dict[int,int] = {} # maps req_ids to their request's scheduled start times + self.req_end_times:Dict[int,int] = {} # maps req_ids to latest end times of any VVPs associated with that Request + self.req_progress:Dict[int,ReadableModel] = {} # req_ids to Request objects (to track progress) + self.req_layers_left:Dict[int,int] = {} # req_ids to integer representing number of layers left + self.req_times:Dict[str, List[float]] = {} # completion times of finished requests (at current time) for each model + self.curr_req_count = 0 # number of DNN requests currently being served + self.reqs_over_time:List[Tuple[int,int]] = [] # list of tuples (time, num_active_reqs) + self.batches:Dict[int,List[int]] = {} # maps req_ids of the first request in a batch to other list of up to `batch_size`-1 requests of same type + self.congested_reqs_set:Set[int] = set() # set of IDs of Requests that arrived and haven't been scheduled to any cores + self.congested_reqs_queue:List[int] = [] # list of IDs of Requests that arrived and haven't been scheduled to any cores (in order of arrival) + self.first_job_seen:Set[int] = set() # set of IDs of requests that have been seen (started) + self.pkl_filename = f"./job_stats/jobs_{processor.name}_{batch_size}_BS_{pkl_num}_{time.time()}.pkl" # name of file about statistics of simulation's Jobs + self.pkl_file = None # object for file about statistics of simulation's Jobs + self.earliest_core_avail_t = 0 # time (in ts) of the next core availability (updated real-time in non-preemptive simulation) + self.req_absolute_start_times = {} # maps req_ids to their start times (in ns) + self.outstanding_reqs = set([i for i in range(last_req)]) # set of IDs for outstanding requests + + def schedule_request(self, model:ReadableModel, start_t:int) -> None: + ''' + Parameters + ---------- + model: object that represents request's layer dependency graph + start_t: time (in ts of simulator) of request's arrival + ''' + self._merge_into_queue([Request(start_t, model, self.req_id)]) + self.req_id += 1 + + def _merge_into_queue(self, events:List[Event]) -> None: + ''' + Parameters + ---------- + events: list of events to merged into queue (by start_t and then req_id for tiebreaking) + ''' + # added left-handedly to prioritize recents + self.queue = deque(merge(events, self.queue, key=lambda event:(event.start_t, event.req_id))) + + def _handle_request(self, req:Request) -> None: + ''' + Processes Request object (updating simulator state) + + Parameters + ---------- + req: Request object corresponding to a DNN inference + ''' + print(f'Request {req.req_id} started on {req.model.name}: {self.time} ts') + + self.req_start_times[req.req_id] = self.time + self.req_progress[req.req_id] = req.model + self.req_layers_left[req.req_id] = len(req.model.layer_index) + self.req_absolute_start_times[req.req_id] = time.time() + self.curr_req_count += 1 + self.reqs_over_time.append((self.time, self.curr_req_count)) + # first independent layers of DAG + first_jobs = req.gen_first_jobs(self.time + self.dpl[req.model.name], self.overhead_factor) + self._merge_into_queue(first_jobs) + + def _handle_job(self, job:Job) -> None: + ''' + Processes Job object (updating simulator state) into partial Job or JobEnd + + Parameters + ---------- + job: Job object corresponding to a DNN layer + ''' + # PREEMPTIVE SCHEDULING (doesn't support batching) + if self.preemptive: + job_end = 0 + job_start = None + idle_times = {} + for _ in range(job.vvps): + assigned_core_id = self.next_core % self.processor.num_cores + core = self.cores[assigned_core_id] + task_start = max(core.current_end_time,self.time) + task_end, idle_time = core.schedule_vvp(job.task, self.time) + if idle_time: + if idle_time in idle_times: + idle_times[idle_time].append(assigned_core_id) + else: + idle_times[idle_time] = [assigned_core_id] + if not job_start: + job_start = task_start + else: + job_start = min(job_start, task_start) + job_end = max(job_end, task_end) + self.next_core += 1 + num_cores_used = min(job.vvps, len(self.cores)) + self._merge_into_queue([JobEnd(job_end, job.req_id, job.layer_id)]) + self.pkl_file.write(f"{self.req_progress[job.req_id].name},{job.req_id},{job.layer_id},{job_start},{job_end},{num_cores_used}\n") + + if len(idle_times) > 0: + for idle_time in idle_times: + core_ids = sorted(idle_times[idle_time]) + intervals = [] + start_interval = core_ids[0] + end_interval = core_ids[0] + + for i in range(1, len(core_ids)): + if core_ids[i] == end_interval + 1: + end_interval = core_ids[i] + else: + intervals.append((start_interval, end_interval)) + start_interval = core_ids[i] + end_interval = core_ids[i] + + intervals.append((start_interval, end_interval)) + + # self.pkl_file.write(f"{idle_time[0]},{idle_time[1]},{intervals}\n") + + # NON-PREEMPTIVE SCHEDULING (supports batching) + else: + idle_cores:Set[int] = set() + new_earliest_core_avail_t = None + if self.earliest_core_avail_t <= self.time: + for core in self.cores: + if core.current_end_time <= self.time: + idle_cores.add(core.id) + else: + if new_earliest_core_avail_t != None: + new_earliest_core_avail_t = min(core.current_end_time, new_earliest_core_avail_t) + else: + new_earliest_core_avail_t = core.current_end_time + + job_end = None + num_tasks = job.vvps + is_first_job = False + + if job.is_first_job and job.req_id not in self.first_job_seen: + if len(idle_cores) == 0: + if job.req_id not in self.congested_reqs_set: + self.congested_reqs_set.add(job.req_id) + self.congested_reqs_queue.append(job.req_id) + print("Adding to congested queue:", job.req_id) + is_first_job = True + else: + self.first_job_seen.add(job.req_id) + print(f"{job.req_id} off congestion queue") + if job.req_id in self.congested_reqs_set and self.batch_size > 1: + new_congest_reqs_queue = [] + curr_batch_size = 0 + curr_req_type = self.req_progress[job.req_id].name + dependent_reqs = [] + for req_id in self.congested_reqs_queue: + if curr_batch_size >= self.batch_size or self.req_progress[req_id].name != curr_req_type: + new_congest_reqs_queue.append(req_id) + else: + curr_batch_size += 1 + if req_id != job.req_id: + dependent_reqs.append(req_id) + # print(req_id) + self.congested_reqs_set.remove(req_id) + self.congested_reqs_queue = new_congest_reqs_queue + if len(dependent_reqs) > 0: + print(f"batching {job.req_id} with {dependent_reqs}") + self.batches[job.req_id] = dependent_reqs + dependent_reqs_set = set(dependent_reqs) + new_queue = deque([]) + for event in self.queue: + if event.req_id not in dependent_reqs_set: + new_queue.append(event) + self.queue = new_queue + + for core_id in idle_cores: + if num_tasks == 0: + break + core = self.cores[core_id] + task_end = core.schedule_vvp(job.task, self.pkl_file, self.time) + num_tasks -= 1 + if job_end == None: + job_end = task_end + if new_earliest_core_avail_t != None: + new_earliest_core_avail_t = min(job_end, new_earliest_core_avail_t) + else: + new_earliest_core_avail_t = job_end + + if num_tasks > 0: + # print(f"partial layer {job.layer_id} of Req {job.req_id} with {num_tasks} task left: {self.time} ts") + # Partial Job + if new_earliest_core_avail_t: + self.earliest_core_avail_t = new_earliest_core_avail_t + future_jobs = [Job(self.earliest_core_avail_t, job.req_id, job.layer_id, num_tasks, job.input_size, job.task, is_first_job)] + while len(self.queue) > 0 and self.queue[0].req_id == job.req_id and isinstance(self.queue[0], Job) and self.queue[0].start_t == job.start_t: + unprocessable_job = self.queue.popleft() + future_jobs.append(Job(self.earliest_core_avail_t, unprocessable_job.req_id, unprocessable_job.layer_id, unprocessable_job.vvps, unprocessable_job.input_size, unprocessable_job.task, is_first_job)) + self._merge_into_queue(future_jobs) + else: + # print(f"finished layer {job.layer_id} of Req {job.req_id}: {self.time} ts") + # JobEnd + self._merge_into_queue([JobEnd(job_end, job.req_id, job.layer_id)]) + + def _handle_jobend(self, jobend:JobEnd) -> None: + ''' + Processes JobEnd object (updating simulator state) + + Parameters + ---------- + jobend: JobEnd object corresponding to end time of a DNN layer + ''' + req_id = jobend.req_id + layer_id = jobend.layer_id + vec_len, num_vvps, children = self.req_progress[req_id].layer_index[layer_id] + self.req_layers_left[req_id] -= 1 + if len(children) > 0: # when there are still children layers + next_layer_set = [] + for c_layer_id in children: + self.req_progress[req_id].prereqs[c_layer_id].remove(layer_id) + if len(self.req_progress[req_id].prereqs[c_layer_id]) == 0: + next_layer_set.append((c_layer_id, *self.req_progress[req_id].layer_index[c_layer_id])) + if len(next_layer_set) > 0: + next_jobs = gen_jobs(next_layer_set, self.time, req_id, self.overhead_factor) + # print(f"Scheduled next jobs: {[job[0] for job in next_layer_set]} for {self.time} ts") + self._merge_into_queue(next_jobs) + else: # request done + self.req_end_times[req_id] = self.time + if self.req_layers_left[req_id] == 0: + total_req_time = self.req_end_times[req_id] - self.req_start_times[req_id] + new_req_times = [total_req_time / self.processor.freq] # ts -> ns + if req_id in self.outstanding_reqs: + self.outstanding_reqs.remove(req_id) + # self.pkl_file.write(f"REQ_TIME,{self.req_progress[jobend.req_id].name},{jobend.req_id},{time.time()-self.req_absolute_start_times[jobend.req_id]}\n") + print(f"Request {req_id} done: {self.time} ts") + print(f"Total req time for {req_id}: {total_req_time / self.processor.freq} ns") + if req_id in self.batches: + for other_req_id in self.batches[req_id]: + other_total_req_time = self.req_end_times[req_id] - self.req_start_times[other_req_id] + new_req_times.append(other_total_req_time / self.processor.freq) + print(f"Request {other_req_id} done: {self.time} ts") + print(f"Total req time: {other_total_req_time / self.processor.freq} ns") + + if self.req_progress[req_id].name in self.req_times: + self.req_times[self.req_progress[req_id].name].extend(new_req_times) + else: + self.req_times[self.req_progress[req_id].name] = new_req_times + self.curr_req_count -= len(new_req_times) + self.reqs_over_time.append((self.time, self.curr_req_count)) + # remove_from_layer_progress = True + + def simulate(self) -> Dict[str,float]: + ''' + Performs simulation based on scheduled requests + + Returns + ------- + average_request_times: dictionary of average lifetime (in ns) of a request in simulation for each DNN + ''' + self.pkl_file = open(self.pkl_filename, "at") + s_time = time.time() + while self.queue and len(self.outstanding_reqs) > 0: + event = self.queue.popleft() + self.time = event.start_t + if isinstance(event, Request): + self._handle_request(event) + elif isinstance(event, Job): + self._handle_job(event) + elif isinstance(event, JobEnd): + self._handle_jobend(event) + + avg_req_times = {} + for model in self.req_times: + avg_req_times[model] = sum(self.req_times[model]) / len(self.req_times[model]) + print(f"Total runtime: {time.time()-s_time} secs") + self.pkl_file.close() + return avg_req_times + + def get_request_count_vs_time(self) -> List[Tuple[float,int]]: + ''' + Returns + ------- + request_count_vs_time: list of (time_stamp_in_ns, number_of_active_requests) + ''' + request_count_vs_time = [(0.,0)] + for time_ts, num_reqs in self.reqs_over_time: + request_count_vs_time.append((time_ts / self.processor.freq, num_reqs)) + return request_count_vs_time + + +class Core(): + def __init__(self, core_id:int) -> None: + self.id = core_id # unique identifier for core + self.current_end_time = 0 # time when core is no longer occupied with current task + + def schedule_vvp(self, task:Task, sim_time=0) -> Tuple[int,Union[None,Tuple[float,float]]]: + ''' + Schedules a task to the core (as soon as available) + + Parameters + ---------- + task: a VVP that wants to be processed on this core + + Returns + ------- + task_end: details for end of just-scheduled task (unless there is already a task or event-driven simulation-mode, then None) + ''' + # if there's a difference between sim_time > self.current_end_time, log difference + if sim_time > self.current_end_time: + idle_time = (self.current_end_time,sim_time) + else: + idle_time = None + self.current_end_time = max(sim_time, self.current_end_time) + task.size + return self.current_end_time, idle_time + + +def build_sim_for_mixed_arrivals(processor:Processor, network_speed:float, pkl_num:int, last_req:int, min_vec_len:int, batch_size=1, preemptive=False): + ''' + Parameters + ---------- + processor: see Simulator spec + network_speed: network speed in Gbps + pkl_num: request schedule pickle file identifier + last_req: last req to end simulation on + min_vec_len: minimum length of vector multiplication (for granularity) + batch_size: maximum batch size for processor + preemptive: whether simulator should use preemptive (or non-preemptive) scheduling strategy + + Returns + ------- + simulator: Simulator object which is properly scheduled + ''' + simulator = Simulator(processor, pkl_num, last_req, batch_size, preemptive) + with open(f'sim_scheds/mixed_sched_{network_speed}_Gbps_{pkl_num}.pkl', 'rb') as file: + schedule = pickle.load(file) + for model_name, arrival_ns in schedule: + model = build_model(model_name, min_vec_len) + arrival_ts = math.ceil(arrival_ns * processor.freq) # ns => ts + simulator.schedule_request(model, arrival_ts) + print(f"Arrival times (in ts): {schedule}") + return simulator + +def ParseOpt(known=False): + parser = argparse.ArgumentParser() + parser.add_argument('--processor', type=str, help='processor on which to simulate') + parser.add_argument('--num_reqs', type=int, help="exact number of requests to simulate") + parser.add_argument('--network_speed', type=float, help="network speed in Gbps") + parser.add_argument('--gran', type=float, help="granularity (minimum vector length)") + parser.add_argument('--preemptive', type=bool, help="preemptive or non-preemptive scheduling strategy") + parser.add_argument('--batch_size', type=int, help="maximum batch size for processor") + parser.add_argument('--pkl_num', type=int, help='request schedule pickle file identifier') + parser.add_argument('--lightning_core_count', type=int, help='number of cores for lightning') + parser.add_argument('--last_req', type=int, help="last req to end simulation on") + opt = parser.parse_known_args()[0] if known else parser.parse_args() + + return opt + + +def sim_mixed_arrivals_on_processor(processor_name:str, network_speed:float, pkl_num:int, last_req:int, min_vec_len=1, batch_size=1, preemptive=False) -> None: + ''' + Simulates multiple DNN models on a processor and prints and returns the average request processing times + + Parameters + ---------- + processor_name: name of processor + network_speed: network speed in Gbps + pkl_num: request schedule pickle file identifier + last_req: last req to end simulation on + min_vec_len: minimum length of vector multiplication (for granularity) + batch_size: maximum batch size for processor + preemptive: preemptive (or non-preemptive) scheduling strategy + + Returns + ------- + average_req_times: dictionary of average lifetime (in ns) of a request in simulation for each DNN + ''' + processor = PROCESSORS[processor_name] + print(f"Running mixed arrivals on {processor.name}...") + simulator = build_sim_for_mixed_arrivals(processor, network_speed, pkl_num, last_req, min_vec_len, batch_size, preemptive) + print("Simulator scheduling complete...") + average_req_times = simulator.simulate() + print(f'Average request times (in ns): {average_req_times}') + request_count_over_time = simulator.get_request_count_vs_time() + print(f'Request count over time (in ns): {request_count_over_time}') + return average_req_times + +if __name__=="__main__": + opt = ParseOpt() + + PROCESSORS['Lightning-1-200-100'] = Processor('Lightning-1-200-100', opt.lightning_core_count, 97, { + "AlexNet": 115*8, + "ResNet-18": 115*21, + "VGG-16": 115*16, + "VGG-19": 115*19, + "BERT": 115*(1+7*24), # Encoder=4 for Self-Attention + 3 for Feed-Forward, 1 FC + "GPT-2": 115*(2+7*48), # Encoder=4 for Self-Attention + 3 for Feed-Forward, 2 FC + "ChatGPT": 115*24291, + "DLRM": 115*8 # Embeddings=1, Bottom MLP=3, Top MLP=4 + }) + + sim_mixed_arrivals_on_processor(opt.processor, opt.network_speed, opt.pkl_num, opt.last_req, opt.gran, opt.batch_size, opt.preemptive) \ No newline at end of file diff --git a/simulation/sim_classes.py b/simulation/sim_classes.py new file mode 100644 index 0000000..f7ad031 --- /dev/null +++ b/simulation/sim_classes.py @@ -0,0 +1,134 @@ +from typing import List, Dict +from dnn_classes import ReadableModel +import math + +class Processor(): + ''' + Specifications of processor to be simulated + ''' + def __init__(self, name:str, num_cores:int, freq:float, dpl:Dict[str,int], overhead_factor=0.) -> None: + ''' + Parameters + ---------- + name: label for processor + num_cores: number of cores available for simulation + freq: clock frequency (in GHz) + dpl: table of datapath latencies (in ns) + overhead_factor: coefficient multiplied by layer input size to determine overhead + ''' + self.name = name + self.num_cores = num_cores + self.freq = freq + self.dpl = dpl.copy() + for m in self.dpl: + self.dpl[m] = math.ceil(self.dpl[m] * freq) # ns -> ts (rounded up to nearest timeslot) + self.overhead_factor = overhead_factor + + +class Event(): + ''' + Event for scheduling to simulator + ''' + def __init__(self, start_t:int, req_id:int) -> None: + ''' + Parameters + ---------- + start_t: time (in ts of simulator) of event's arrival (non-negative integer) + req_id: identifier of request that the event is associated with (a natural number) + ''' + self.start_t = start_t + self.req_id = req_id + + +class Task(): + ''' + Represents VVP in a layer of DNN + ''' + def __init__(self, req_id:int, layer_id:int, size:int) -> None: + ''' + Parameters + ---------- + req_id: identifier of request that the event is associated with (a natural number) + layer_id: id of layer in model this Task is associated with + size: number of element-element multiplication operations in VVP + ''' + self.req_id = req_id + self.layer_id = layer_id + self.size = size + + +class Job(Event): + ''' + Represents a layer in DNN + ''' + def __init__(self, start_t:int, req_id:int, layer_id: int, vvps:int, input_size:int,task=None,is_first_job=False) -> None: + ''' + Parameters + ---------- + start_t: see Event spec + req_id: see Event spec + layer_id: id of layer in model this Job represents + vvps: number of VVPs in layer + input_size: duration of each VVP (in ts) + task: Task object associated with VVP + is_first_job: whether this Job is a first layer in its DNN request + ''' + super().__init__(start_t, req_id) + self.layer_id = layer_id + self.vvps = vvps + self.input_size:int = input_size + if task == None: + self.task = Task(req_id, layer_id, input_size) + else: + self.task = task + self.is_first_job = is_first_job + + +class JobEnd(Event): + ''' + Represents end of a Job (DNN layer) + ''' + def __init__(self, start_t:int, req_id:int, layer_id:int): + ''' + Parameters + ---------- + start_t: see Event spec (the event is 0 cycles long) + req_id: see Event spec + layer_id: ID of layer in model this Job represents + ''' + super().__init__(start_t, req_id) + self.layer_id = layer_id + + +class Request(Event): + ''' + Represents DNN with only fully-connected layers + ''' + def __init__(self, start_t:int, model:ReadableModel, req_id:int) -> None: + ''' + Parameters + ---------- + start_t: see Event spec + layers: list of tuples that outline the input size and number of VVPs for each layer + req_id: see Event spec + ''' + super().__init__(start_t, req_id) + self.model = ReadableModel(model.name, model.layer_index, model.prereqs, model.independent_layers) # copied to prevent aliasing + + def gen_first_jobs(self, curr_time:int, overhead_factor=0.) -> List[Job]: + ''' + Parameters + ---------- + curr_time: simulator's time (in ts) + overhead_factor: coefficient multiplied by layer input size to determine overhead + + Returns + ------- + jobs: Jobs corresponding to first layers of request (that are not dependent on each other/parallel) + ''' + first_layers = [(layer_id, *self.model.layer_index[layer_id]) for layer_id in self.model.independent_layers] + jobs = [] + for layer_id, input_size, vvps, children in first_layers: + overhead_time = math.ceil(overhead_factor*input_size) + jobs.append(Job(curr_time+overhead_time, self.req_id, layer_id, vvps, input_size, is_first_job=True)) + return jobs \ No newline at end of file diff --git a/simulation/sim_scheds/mixed_sched_0.01_Gbps_1.pkl b/simulation/sim_scheds/mixed_sched_0.01_Gbps_1.pkl new file mode 100644 index 0000000..ddf259d Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.01_Gbps_1.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.01_Gbps_10.pkl b/simulation/sim_scheds/mixed_sched_0.01_Gbps_10.pkl new file mode 100644 index 0000000..631cbbb Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.01_Gbps_10.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.01_Gbps_2.pkl b/simulation/sim_scheds/mixed_sched_0.01_Gbps_2.pkl new file mode 100644 index 0000000..468b0de Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.01_Gbps_2.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.01_Gbps_3.pkl b/simulation/sim_scheds/mixed_sched_0.01_Gbps_3.pkl new file mode 100644 index 0000000..40eb4c9 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.01_Gbps_3.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.01_Gbps_4.pkl b/simulation/sim_scheds/mixed_sched_0.01_Gbps_4.pkl new file mode 100644 index 0000000..925d1a1 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.01_Gbps_4.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.01_Gbps_5.pkl b/simulation/sim_scheds/mixed_sched_0.01_Gbps_5.pkl new file mode 100644 index 0000000..32b2c65 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.01_Gbps_5.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.01_Gbps_6.pkl b/simulation/sim_scheds/mixed_sched_0.01_Gbps_6.pkl new file mode 100644 index 0000000..8e3ddb3 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.01_Gbps_6.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.01_Gbps_7.pkl b/simulation/sim_scheds/mixed_sched_0.01_Gbps_7.pkl new file mode 100644 index 0000000..41418c6 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.01_Gbps_7.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.01_Gbps_8.pkl b/simulation/sim_scheds/mixed_sched_0.01_Gbps_8.pkl new file mode 100644 index 0000000..8c1f3bf Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.01_Gbps_8.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.01_Gbps_9.pkl b/simulation/sim_scheds/mixed_sched_0.01_Gbps_9.pkl new file mode 100644 index 0000000..7fe400b Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.01_Gbps_9.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.02_Gbps_1.pkl b/simulation/sim_scheds/mixed_sched_0.02_Gbps_1.pkl new file mode 100644 index 0000000..d5f4e7b Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.02_Gbps_1.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.02_Gbps_10.pkl b/simulation/sim_scheds/mixed_sched_0.02_Gbps_10.pkl new file mode 100644 index 0000000..5f9a4ef Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.02_Gbps_10.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.02_Gbps_2.pkl b/simulation/sim_scheds/mixed_sched_0.02_Gbps_2.pkl new file mode 100644 index 0000000..e12e31b Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.02_Gbps_2.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.02_Gbps_3.pkl b/simulation/sim_scheds/mixed_sched_0.02_Gbps_3.pkl new file mode 100644 index 0000000..e645f1b Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.02_Gbps_3.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.02_Gbps_4.pkl b/simulation/sim_scheds/mixed_sched_0.02_Gbps_4.pkl new file mode 100644 index 0000000..4bbec99 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.02_Gbps_4.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.02_Gbps_5.pkl b/simulation/sim_scheds/mixed_sched_0.02_Gbps_5.pkl new file mode 100644 index 0000000..187e046 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.02_Gbps_5.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.02_Gbps_6.pkl b/simulation/sim_scheds/mixed_sched_0.02_Gbps_6.pkl new file mode 100644 index 0000000..311818d Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.02_Gbps_6.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.02_Gbps_7.pkl b/simulation/sim_scheds/mixed_sched_0.02_Gbps_7.pkl new file mode 100644 index 0000000..e68d23c Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.02_Gbps_7.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.02_Gbps_8.pkl b/simulation/sim_scheds/mixed_sched_0.02_Gbps_8.pkl new file mode 100644 index 0000000..0bb1d67 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.02_Gbps_8.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.02_Gbps_9.pkl b/simulation/sim_scheds/mixed_sched_0.02_Gbps_9.pkl new file mode 100644 index 0000000..9a2d53f Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.02_Gbps_9.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.03_Gbps_1.pkl b/simulation/sim_scheds/mixed_sched_0.03_Gbps_1.pkl new file mode 100644 index 0000000..a210229 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.03_Gbps_1.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.03_Gbps_10.pkl b/simulation/sim_scheds/mixed_sched_0.03_Gbps_10.pkl new file mode 100644 index 0000000..f51fa20 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.03_Gbps_10.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.03_Gbps_2.pkl b/simulation/sim_scheds/mixed_sched_0.03_Gbps_2.pkl new file mode 100644 index 0000000..1ecb504 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.03_Gbps_2.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.03_Gbps_3.pkl b/simulation/sim_scheds/mixed_sched_0.03_Gbps_3.pkl new file mode 100644 index 0000000..c0ba418 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.03_Gbps_3.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.03_Gbps_4.pkl b/simulation/sim_scheds/mixed_sched_0.03_Gbps_4.pkl new file mode 100644 index 0000000..9f7a3b0 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.03_Gbps_4.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.03_Gbps_5.pkl b/simulation/sim_scheds/mixed_sched_0.03_Gbps_5.pkl new file mode 100644 index 0000000..7550a05 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.03_Gbps_5.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.03_Gbps_6.pkl b/simulation/sim_scheds/mixed_sched_0.03_Gbps_6.pkl new file mode 100644 index 0000000..0cfc468 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.03_Gbps_6.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.03_Gbps_7.pkl b/simulation/sim_scheds/mixed_sched_0.03_Gbps_7.pkl new file mode 100644 index 0000000..683f82a Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.03_Gbps_7.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.03_Gbps_8.pkl b/simulation/sim_scheds/mixed_sched_0.03_Gbps_8.pkl new file mode 100644 index 0000000..1a4b3c1 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.03_Gbps_8.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.03_Gbps_9.pkl b/simulation/sim_scheds/mixed_sched_0.03_Gbps_9.pkl new file mode 100644 index 0000000..e8bc77d Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.03_Gbps_9.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.04_Gbps_1.pkl b/simulation/sim_scheds/mixed_sched_0.04_Gbps_1.pkl new file mode 100644 index 0000000..2caa2c6 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.04_Gbps_1.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.04_Gbps_10.pkl b/simulation/sim_scheds/mixed_sched_0.04_Gbps_10.pkl new file mode 100644 index 0000000..3c898fb Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.04_Gbps_10.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.04_Gbps_2.pkl b/simulation/sim_scheds/mixed_sched_0.04_Gbps_2.pkl new file mode 100644 index 0000000..3f2483e Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.04_Gbps_2.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.04_Gbps_3.pkl b/simulation/sim_scheds/mixed_sched_0.04_Gbps_3.pkl new file mode 100644 index 0000000..4b51da9 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.04_Gbps_3.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.04_Gbps_4.pkl b/simulation/sim_scheds/mixed_sched_0.04_Gbps_4.pkl new file mode 100644 index 0000000..b5e5f6a Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.04_Gbps_4.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.04_Gbps_5.pkl b/simulation/sim_scheds/mixed_sched_0.04_Gbps_5.pkl new file mode 100644 index 0000000..4d29324 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.04_Gbps_5.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.04_Gbps_6.pkl b/simulation/sim_scheds/mixed_sched_0.04_Gbps_6.pkl new file mode 100644 index 0000000..849f10a Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.04_Gbps_6.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.04_Gbps_7.pkl b/simulation/sim_scheds/mixed_sched_0.04_Gbps_7.pkl new file mode 100644 index 0000000..30b425a Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.04_Gbps_7.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.04_Gbps_8.pkl b/simulation/sim_scheds/mixed_sched_0.04_Gbps_8.pkl new file mode 100644 index 0000000..192f43d Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.04_Gbps_8.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.04_Gbps_9.pkl b/simulation/sim_scheds/mixed_sched_0.04_Gbps_9.pkl new file mode 100644 index 0000000..6b8399b Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.04_Gbps_9.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.05_Gbps_1.pkl b/simulation/sim_scheds/mixed_sched_0.05_Gbps_1.pkl new file mode 100644 index 0000000..ca3d6f2 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.05_Gbps_1.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.05_Gbps_10.pkl b/simulation/sim_scheds/mixed_sched_0.05_Gbps_10.pkl new file mode 100644 index 0000000..e4206b6 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.05_Gbps_10.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.05_Gbps_2.pkl b/simulation/sim_scheds/mixed_sched_0.05_Gbps_2.pkl new file mode 100644 index 0000000..3168299 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.05_Gbps_2.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.05_Gbps_3.pkl b/simulation/sim_scheds/mixed_sched_0.05_Gbps_3.pkl new file mode 100644 index 0000000..00268d5 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.05_Gbps_3.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.05_Gbps_4.pkl b/simulation/sim_scheds/mixed_sched_0.05_Gbps_4.pkl new file mode 100644 index 0000000..81d8c86 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.05_Gbps_4.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.05_Gbps_5.pkl b/simulation/sim_scheds/mixed_sched_0.05_Gbps_5.pkl new file mode 100644 index 0000000..90e7df8 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.05_Gbps_5.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.05_Gbps_6.pkl b/simulation/sim_scheds/mixed_sched_0.05_Gbps_6.pkl new file mode 100644 index 0000000..1549a50 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.05_Gbps_6.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.05_Gbps_7.pkl b/simulation/sim_scheds/mixed_sched_0.05_Gbps_7.pkl new file mode 100644 index 0000000..265bd2b Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.05_Gbps_7.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.05_Gbps_8.pkl b/simulation/sim_scheds/mixed_sched_0.05_Gbps_8.pkl new file mode 100644 index 0000000..ba32a66 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.05_Gbps_8.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.05_Gbps_9.pkl b/simulation/sim_scheds/mixed_sched_0.05_Gbps_9.pkl new file mode 100644 index 0000000..a67f007 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.05_Gbps_9.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.06_Gbps_1.pkl b/simulation/sim_scheds/mixed_sched_0.06_Gbps_1.pkl new file mode 100644 index 0000000..b38947e Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.06_Gbps_1.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.06_Gbps_10.pkl b/simulation/sim_scheds/mixed_sched_0.06_Gbps_10.pkl new file mode 100644 index 0000000..5d68584 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.06_Gbps_10.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.06_Gbps_2.pkl b/simulation/sim_scheds/mixed_sched_0.06_Gbps_2.pkl new file mode 100644 index 0000000..6a9c854 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.06_Gbps_2.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.06_Gbps_3.pkl b/simulation/sim_scheds/mixed_sched_0.06_Gbps_3.pkl new file mode 100644 index 0000000..e7f18a5 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.06_Gbps_3.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.06_Gbps_4.pkl b/simulation/sim_scheds/mixed_sched_0.06_Gbps_4.pkl new file mode 100644 index 0000000..0a2808f Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.06_Gbps_4.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.06_Gbps_5.pkl b/simulation/sim_scheds/mixed_sched_0.06_Gbps_5.pkl new file mode 100644 index 0000000..e4b2fd9 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.06_Gbps_5.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.06_Gbps_6.pkl b/simulation/sim_scheds/mixed_sched_0.06_Gbps_6.pkl new file mode 100644 index 0000000..e63ecd7 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.06_Gbps_6.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.06_Gbps_7.pkl b/simulation/sim_scheds/mixed_sched_0.06_Gbps_7.pkl new file mode 100644 index 0000000..7b2dca8 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.06_Gbps_7.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.06_Gbps_8.pkl b/simulation/sim_scheds/mixed_sched_0.06_Gbps_8.pkl new file mode 100644 index 0000000..09d8e13 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.06_Gbps_8.pkl differ diff --git a/simulation/sim_scheds/mixed_sched_0.06_Gbps_9.pkl b/simulation/sim_scheds/mixed_sched_0.06_Gbps_9.pkl new file mode 100644 index 0000000..c04e316 Binary files /dev/null and b/simulation/sim_scheds/mixed_sched_0.06_Gbps_9.pkl differ diff --git a/simulation/trial_to_csv.py b/simulation/trial_to_csv.py new file mode 100644 index 0000000..4e8017d --- /dev/null +++ b/simulation/trial_to_csv.py @@ -0,0 +1,84 @@ +from typing import Tuple +import re +import ast +import argparse + +def trial_parser(filename:str) -> Tuple[str,str]: + ''' + Parses trial file into two strings: one for average request completion times (and runtime) and one for active request count over time + + Parameters + ---------- + filename: name of file where trial stored + + Returns + ------- + str1: average request completion times (and runtime) + str2: active request count over time + ''' + total_runtime = None + avg_req_completion = None + active_reqs_v_time = None + finished = False + with open(filename, "r") as trial: + lines = trial.readlines() + for line in lines: + if line.startswith("Total runtime:"): + finished = True + pattern = r"\d+\.\d+" + match = re.search(pattern, line) + if match: + total_runtime = float(match.group()) + elif line.startswith("Average request times (in ns):"): + avg_req_completion = ast.literal_eval(line[31:]) + elif line.startswith("Request count over time (in ns):"): + active_reqs_v_time = ast.literal_eval(line[33:]) + if finished: + str1 = f"Runtime,{total_runtime}\n" + + for model_name in avg_req_completion: + str1 += f"{model_name},{avg_req_completion[model_name]}\n" + str2 = '\n'.join(','.join(map(str, tpl)) for tpl in active_reqs_v_time) + return str1, str2 + else: + req_completions = {} + with open(filename, "r") as trial: + lines = trial.readlines() + arrivals = [] + for line in lines: + if line.startswith("Arrival times (in ts):"): + arrivals = ast.literal_eval(line[23:]) + elif line.startswith("Total req time for"): + tokens = line.split() + req_id = int(tokens[4][:-1]) + rt = float(tokens[5]) + model_name = arrivals[req_id-1][0] + if model_name in req_completions: + req_completions[model_name].append(rt) + else: + req_completions[model_name] = [rt] + s = "" + for model_name in req_completions: + s += f"{model_name},{sum(req_completions[model_name])/len(req_completions[model_name])}\n" + return s, "" + +def ParseOpt(known=False): + parser = argparse.ArgumentParser() + parser.add_argument('--batch_size', type=int, help="maximum batch size for processor") + parser.add_argument('--lightning_core_count', type=int, help="number of cores for Lightning") + parser.add_argument('--num_reqs', type=int, help="exact number of requests to simulate") + parser.add_argument('--network_speed', type=str, help="network speed (in Gbps)") + parser.add_argument('--pkl_num', type=int, help="request schedule pickle file identifier") + parser.add_argument('--processor', type=str, help="name of processor") + opt = parser.parse_known_args()[0] if known else parser.parse_args() + + return opt + +if __name__=="__main__": + opt = ParseOpt() + filename = f"trials/{opt.processor}_{opt.network_speed}_Gbps_l{opt.lightning_core_count}_cores_{opt.num_reqs}_reqs_{opt.batch_size}_BS_{opt.pkl_num}.txt" + str1, str2 = trial_parser(filename) + with open(f"results/runtimes/{opt.processor}_{opt.network_speed}_Gbps_l{opt.lightning_core_count}_cores_{opt.num_reqs}_reqs_{opt.batch_size}_BS_{opt.pkl_num}.csv", "w") as file: + file.write(str1) + with open(f"results/active_reqs/{opt.processor}_{opt.network_speed}_Gbps_l{opt.lightning_core_count}_cores_{opt.num_reqs}_reqs_{opt.batch_size}_BS_{opt.pkl_num}.csv", "w") as file: + file.write(str2) \ No newline at end of file diff --git a/simulation/utils.py b/simulation/utils.py new file mode 100644 index 0000000..b4be3ca --- /dev/null +++ b/simulation/utils.py @@ -0,0 +1,24 @@ +from typing import List, Tuple +from sim_classes import Job +import math + +def gen_jobs(layer_set:List[Tuple[int,int,int,List[int]]], curr_time:int, req_id:int, overhead_factor=0.): + ''' + Generates Jobs given a list of parallel layers + + Parameters + ---------- + layer_set: list of layer dimensions (layer_id, input_size, num_vvps, children) to be executed in parallel (no dependencies) + curr_time: simulator's current time (in ts) + req_id: id of associated Request + overhead_factor: see spec for Simulator.__init__() + + Returns + ------- + jobs: list of appropriately-scheduled Jobs corresponding to layer information + ''' + jobs = [] + for layer_id, input_size, vvps, children in layer_set: + overhead_time = math.ceil(overhead_factor*input_size) + jobs.append(Job(curr_time+overhead_time, req_id, layer_id, vvps, input_size)) + return jobs \ No newline at end of file