-
Notifications
You must be signed in to change notification settings - Fork 1
/
process_datasets.job
99 lines (83 loc) · 2.63 KB
/
process_datasets.job
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/bin/bash
#SBATCH --job-name=data_process
#SBATCH --time=06:00:00
#SBATCH --array=1-4
set -e
module purge
# LOAD ANACONDA MDOULE
# Activate your environment
source activate meta-learning-gnns
cd $HOME/meta-learning-gnns/main
PARAMETERS_FILE=../job_parameters/default_datasets_process.txt
echo $(head -$SLURM_ARRAY_TASK_ID $PARAMETERS_FILE | tail -1)
declare -i BATCH_SIZE
declare -i NODES_BUDGET
declare -i PARTITION_BUDGET
declare -i NUM_WORKERS
BATCH_SIZE=32
NODES_BUDGET=2048
PARTITION_BUDGET=256
NUM_WORKERS=0
# Define the variable $DATA_DIR
# Could be a temporary directory for fast copy
# First process the features
srun python -u preprocess.py \
print_config=false \
data.processed_data_dir=$DATA_DIR \
skip_data_processing=false \
skip_graph_processing=false \
skip_feature_extraction=false \
skip_structure=true \
structure=full \
$(head -$SLURM_ARRAY_TASK_ID $PARAMETERS_FILE | tail -1)
for FOLD in 0 1 2 3 4
do
# Then do all the structuring
echo -e "\n\n##### Fold ${FOLD} #####\n\n"
echo -e "\n\n>>> Full Graph <<<\n\n"
srun python -u preprocess.py \
data.processed_data_dir=$DATA_DIR \
print_config=false \
skip_data_processing=true \
skip_graph_processing=true \
skip_feature_extraction=true \
fold=$FOLD \
structure=full \
$(head -$SLURM_ARRAY_TASK_ID $PARAMETERS_FILE | tail -1)
echo -e "\n\n>>> Subgraphs <<<\n\n"
srun python -u preprocess.py \
data.processed_data_dir=$DATA_DIR \
print_config=false \
skip_data_processing=true \
skip_graph_processing=true \
skip_feature_extraction=true \
fold=$FOLD \
structure=khop \
structure.batch_size=$BATCH_SIZE \
structure.max_nodes_per_subgraph=$NODES_BUDGET \
structure.max_samples_per_partition=$PARTITION_BUDGET \
structure.num_workers=$NUM_WORKERS \
structure.node_weights_dist=uniform \
structure.label_dist=frequency \
$(head -$SLURM_ARRAY_TASK_ID $PARAMETERS_FILE | tail -1)
echo -e "\n\n>>> Episodic Subgraphs <<<\n\n"
SUPPORT_PARTITION_BUDGET=$((BATCH_SIZE * PARTITION_BUDGET))
srun python -u preprocess.py \
data.processed_data_dir=$DATA_DIR \
print_config=false \
skip_data_processing=true \
skip_graph_processing=true \
skip_feature_extraction=true \
fold=$FOLD \
structure=episodic_khop \
structure.batch_size=$BATCH_SIZE \
structure.max_nodes_per_subgraph=$NODES_BUDGET \
structure.max_samples_per_partition=$SUPPORT_PARTITION_BUDGET\
structure.max_samples_per_eval_partition=$PARTITION_BUDGET \
structure.num_workers=$NUM_WORKERS \
structure.node_weights_dist=uniform \
structure.label_dist=frequency \
structure.prop_query=0.5 \
$(head -$SLURM_ARRAY_TASK_ID $PARAMETERS_FILE | tail -1)
done
echo -e "\nDone."