Skip to content

Commit

Permalink
no exception throwing within omp parallel; set nthread in Learner (#1421
Browse files Browse the repository at this point in the history
)
  • Loading branch information
khotilov authored and tqchen committed Jul 29, 2016
1 parent 89c4f67 commit 75f4014
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 19 deletions.
27 changes: 17 additions & 10 deletions src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ DMLC_REGISTRY_FILE_TAG(gbtree);

/*! \brief training parameters */
struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
/*! \brief number of threads */
int nthread;
/*!
* \brief number of parallel trees constructed each iteration
* use this option to support boosted random forest
Expand All @@ -37,8 +35,6 @@ struct GBTreeTrainParam : public dmlc::Parameter<GBTreeTrainParam> {
std::string updater_seq;
// declare parameters
DMLC_DECLARE_PARAMETER(GBTreeTrainParam) {
DMLC_DECLARE_FIELD(nthread).set_lower_bound(0).set_default(0)
.describe("Number of threads used for training.");
DMLC_DECLARE_FIELD(num_parallel_tree).set_lower_bound(1).set_default(1)
.describe("Number of parallel trees constructed during each iteration."\
" This option is used to support boosted random forest");
Expand Down Expand Up @@ -145,9 +141,6 @@ class GBTree : public GradientBooster {
for (const auto& up : updaters) {
up->Init(cfg);
}
if (tparam.nthread != 0) {
omp_set_num_threads(tparam.nthread);
}
}

void Load(dmlc::Stream* fi) override {
Expand Down Expand Up @@ -247,12 +240,16 @@ class GBTree : public GradientBooster {
const RowBatch &batch = iter->Value();
// parallel over local batch
const bst_omp_uint nsize = static_cast<bst_omp_uint>(batch.size);
int ridx_error = 0;
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
const int tid = omp_get_thread_num();
RegTree::FVec &feats = thread_temp[tid];
int64_t ridx = static_cast<int64_t>(batch.base_rowid + i);
CHECK_LT(static_cast<size_t>(ridx), info.num_row);
if (static_cast<size_t>(ridx) >= info.num_row) {
ridx_error = 1;
continue;
}
// loop over output groups
for (int gid = 0; gid < mparam.num_output_group; ++gid) {
this->Pred(batch[i],
Expand All @@ -262,6 +259,7 @@ class GBTree : public GradientBooster {
ntree_limit);
}
}
CHECK(!ridx_error) << "ridx out of bounds";
}
}

Expand Down Expand Up @@ -368,19 +366,28 @@ class GBTree : public GradientBooster {
const int* leaf_position) {
const RowSet& rowset = p_fmat->buffered_rowset();
const bst_omp_uint ndata = static_cast<bst_omp_uint>(rowset.size());
int pred_counter_error = 0, tid_error = 0;
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i];
const int64_t bid = this->BufferOffset(buffer_offset + ridx, bst_group);
const int tid = leaf_position[ridx];
CHECK_EQ(pred_counter[bid], trees.size());
CHECK_GE(tid, 0);
if (pred_counter[bid] != trees.size()) {
pred_counter_error = 1;
continue;
}
if (tid < 0) {
tid_error = 1;
continue;
}
pred_buffer[bid] += new_tree[tid].leaf_value();
for (int i = 0; i < mparam.size_leaf_vector; ++i) {
pred_buffer[bid + i + 1] += new_tree.leafvec(tid)[i];
}
pred_counter[bid] += tparam.num_parallel_tree;
}
CHECK(!pred_counter_error) << "incorrect pred_counter[bid]";
CHECK(!tid_error) << "tid cannot be negative";
}
// make a prediction for a single instance
inline void Pred(const RowBatch::Inst &inst,
Expand Down
11 changes: 10 additions & 1 deletion src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ struct LearnerTrainParam
float prob_buffer_row;
// maximum row per batch.
size_t max_row_perbatch;
// number of threads to use if OpenMP is enabled
// if equals 0, use system default
int nthread;
// declare parameters
DMLC_DECLARE_PARAMETER(LearnerTrainParam) {
DMLC_DECLARE_FIELD(seed).set_default(0)
Expand All @@ -101,6 +104,8 @@ struct LearnerTrainParam
.describe("Maximum buffered row portion");
DMLC_DECLARE_FIELD(max_row_perbatch).set_default(std::numeric_limits<size_t>::max())
.describe("maximum row per batch.");
DMLC_DECLARE_FIELD(nthread).set_default(0)
.describe("Number of threads to use.");
}
};

Expand Down Expand Up @@ -149,7 +154,11 @@ class LearnerImpl : public Learner {
cfg_[kv.first] = kv.second;
}
}
// add additional parameter
if (tparam.nthread != 0) {
omp_set_num_threads(tparam.nthread);
}

// add additional parameters
// These are cosntraints that need to be satisfied.
if (tparam.dsplit == 0 && rabit::IsDistributed()) {
tparam.dsplit = 2;
Expand Down
9 changes: 7 additions & 2 deletions src/metric/rank_metric.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ struct EvalAuc : public Metric {
const bst_omp_uint ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
// sum statistics
double sum_auc = 0.0f;
int auc_error = 0;
#pragma omp parallel reduction(+:sum_auc)
{
// each thread takes a local rec
Expand Down Expand Up @@ -128,12 +129,16 @@ struct EvalAuc : public Metric {
sum_npos += buf_pos;
sum_nneg += buf_neg;
// check weird conditions
CHECK(sum_npos > 0.0 && sum_nneg > 0.0)
<< "AUC: the dataset only contains pos or neg samples";
if (sum_npos <= 0.0 || sum_nneg <= 0.0) {
auc_error = 1;
continue;
}
// this is the AUC
sum_auc += sum_pospair / (sum_npos*sum_nneg);
}
}
CHECK(!auc_error)
<< "AUC: the dataset only contains pos or neg samples";
if (distributed) {
float dat[2];
dat[0] = static_cast<float>(sum_auc);
Expand Down
7 changes: 1 addition & 6 deletions src/tree/param.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
int parallel_option;
// option to open cacheline optimization
bool cache_opt;
// number of threads to be used for tree construction,
// if OpenMP is enabled, if equals 0, use system default
int nthread;
// whether to not print info during training.
bool silent;
// declare the parameters
Expand Down Expand Up @@ -98,10 +95,8 @@ struct TrainParam : public dmlc::Parameter<TrainParam> {
.describe("Different types of parallelization algorithm.");
DMLC_DECLARE_FIELD(cache_opt).set_default(true)
.describe("EXP Param: Cache aware optimization.");
DMLC_DECLARE_FIELD(nthread).set_default(0)
.describe("Number of threads used for training.");
DMLC_DECLARE_FIELD(silent).set_default(false)
.describe("Not print information during trainig.");
.describe("Do not print information during trainig.");
// add alias of parameters
DMLC_DECLARE_ALIAS(reg_lambda, lambda);
DMLC_DECLARE_ALIAS(reg_alpha, alpha);
Expand Down

0 comments on commit 75f4014

Please sign in to comment.