Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix port up/bfd sessions bringup notification delay issue. #3269

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions orchagent/orch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "zmqconsumerstatetable.h"
#include "sai_serialize.h"

#define CONSUMER_POP_MAX_BATCH_COUNT 50

using namespace swss;

int gBatchSize = 0;
Expand Down Expand Up @@ -242,14 +244,10 @@ void Consumer::execute()
// ConsumerBase::execute_impl<swss::ConsumerTableBase>();
SWSS_LOG_ENTER();

size_t update_size = 0;
auto table = static_cast<swss::ConsumerTableBase *>(getSelectable());
do
{
std::deque<KeyOpFieldsValuesTuple> entries;
table->pops(entries);
update_size = addToSync(entries);
} while (update_size != 0);
Copy link
Contributor Author

@liuh-80 liuh-80 Sep 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multiple test case failed. because not get expected data within 20 seconds.
Possible reason:

  1. some types of notification depends on each other, so need pops all entries in same loop.
  2. this PR make orchagent process some data slowly than before.
    Will try different solution to only limit route process in another POC PR
    [POC] Improve routeorch to stop process routes when high priority notification coming. #3278

std::deque<KeyOpFieldsValuesTuple> entries;
table->pops(entries);
addToSync(entries);

drain();
}
Expand Down Expand Up @@ -789,7 +787,7 @@ void Orch::addConsumer(DBConnector *db, string tableName, int pri)
}
else
{
addExecutor(new Consumer(new ConsumerStateTable(db, tableName, gBatchSize, pri), this, tableName));
addExecutor(new Consumer(new ConsumerStateTable(db, tableName, gBatchSize * CONSUMER_POP_MAX_BATCH_COUNT, pri), this, tableName));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change will cause an issue:

sonic-net/sonic-buildimage@286ec3e

Background running lua script may cause redis-server quite busy if batch size is 8192.
If handling time exceeded default 5s, the redis-server will not response to other process and will cause syncd crash.

So that's also part of reason why add a loop in execute()

}
}

Expand Down
46 changes: 46 additions & 0 deletions tests/mock_tests/consumer_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,25 @@ namespace consumer_test
{
using namespace std;

class TestOrch : public Orch
{
public:
TestOrch(swss::DBConnector *db, string tableName)
:Orch(db, tableName),
m_notification_count(0)
{
}

void doTask(Consumer& consumer)
{
std::cout << "TestOrch::doTask " << consumer.m_toSync.size() << std::endl;
m_notification_count += consumer.m_toSync.size();
consumer.m_toSync.clear();
}

long m_notification_count;
};

struct ConsumerTest : public ::testing::Test
{
shared_ptr<swss::DBConnector> m_app_db;
Expand Down Expand Up @@ -322,4 +341,31 @@ namespace consumer_test
validate_syncmap(consumer->m_toSync, 1, key, exp_kofv);

}

TEST_F(ConsumerTest, ConsumerPops_notification_count)
{
int consumer_pops_batch_size = 10;
TestOrch test_orch(m_config_db.get(), "CFG_TEST_TABLE");
Consumer test_consumer(
new swss::ConsumerStateTable(m_config_db.get(), "CFG_TEST_TABLE", consumer_pops_batch_size, 1), &test_orch, "CFG_TEST_TABLE");
swss::ProducerStateTable producer_table(m_config_db.get(), "CFG_TEST_TABLE");

m_config_db->flushdb();
for (int notification_count = 0; notification_count< consumer_pops_batch_size*2; notification_count++)
{
std::vector<FieldValueTuple> fields;
FieldValueTuple t("test_field", "test_value");
fields.push_back(t);
producer_table.set(std::to_string(notification_count), fields);

cout << "ConsumerPops_notification_count:: add key: " << notification_count << endl;
}

// consumer should pops consumer_pops_batch_size notifications
test_consumer.execute();
ASSERT_EQ(test_orch.m_notification_count, consumer_pops_batch_size);

test_consumer.execute();
ASSERT_EQ(test_orch.m_notification_count, consumer_pops_batch_size*2);
}
}
30 changes: 30 additions & 0 deletions tests/mock_tests/mock_consumerstatetable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,34 @@ namespace swss
TableName_KeySet(tableName)
{
}

void ConsumerStateTable::pops(std::deque<KeyOpFieldsValuesTuple> &vkco, const std::string& /*prefix*/)
{
int count = 0;
swss::Table table(getDbConnector(), getTableName());
std::vector<std::string> keys;
table.getKeys(keys);
for (const auto &key: keys)
{
// pop with batch size
if (count < POP_BATCH_SIZE)
{
count++;
}
else
{
break;
}

KeyOpFieldsValuesTuple kco;
kfvKey(kco) = key;
kfvOp(kco) = SET_COMMAND;
if (!table.get(key, kfvFieldsValues(kco)))
{
continue;
}
table.del(key);
vkco.push_back(kco);
}
}
}
Loading