Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PropagateDownload: Skip identical files more #6168

Merged
merged 2 commits into from
Nov 16, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 5 additions & 17 deletions src/csync/csync_reconcile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,6 @@ static csync_file_stat_t *_csync_check_ignored(csync_s::FileMap *tree, const Byt
}
}

/* Returns true if we're reasonably certain that hash equality
* for the header means content equality.
*
* Cryptographic safety is not required - this is mainly
* intended to rule out checksums like Adler32 that are not intended for
* hashing and have high likelihood of collision with particular inputs.
*/
static bool _csync_is_collision_safe_hash(const char *checksum_header)
{
return strncmp(checksum_header, "SHA1:", 5) == 0
|| strncmp(checksum_header, "MD5:", 4) == 0;
}

/**
* The main function in the reconcile pass.
Expand Down Expand Up @@ -297,14 +285,14 @@ static int _csync_merge_algorithm_visitor(csync_file_stat_t *cur, CSYNC * ctx) {
//
// In older client versions we always treated these cases as a
// non-conflict. This behavior is preserved in case the server
// doesn't provide a suitable content hash.
// doesn't provide a content checksum.
//
// When it does have one, however, we do create a job, but the job
// will compare hashes and avoid the download if they are equal.
const char *remoteChecksumHeader =
// will compare hashes and avoid the download if possible.
QByteArray remoteChecksumHeader =
(ctx->current == REMOTE_REPLICA ? cur->checksumHeader : other->checksumHeader);
if (remoteChecksumHeader) {
is_conflict |= _csync_is_collision_safe_hash(remoteChecksumHeader);
if (!remoteChecksumHeader.isEmpty()) {
is_conflict = true;
}

// SO: If there is no checksum, we can have !is_conflict here
Expand Down
7 changes: 7 additions & 0 deletions src/csync/csync_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,10 @@ time_t oc_httpdate_parse( const char *date ) {
result = timegm(&gmt);
return result;
}


bool csync_is_collision_safe_hash(const QByteArray &checksum_header)
{
return checksum_header.startsWith("SHA1:")
|| checksum_header.startsWith("MD5:");
}
10 changes: 10 additions & 0 deletions src/csync/csync_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,14 @@ const char OCSYNC_EXPORT *csync_instruction_str(enum csync_instructions_e instr)
void OCSYNC_EXPORT csync_memstat_check(void);

bool OCSYNC_EXPORT csync_file_locked_or_open( const char *dir, const char *fname);

/* Returns true if we're reasonably certain that hash equality
* for the header means content equality.
*
* Cryptographic safety is not required - this is mainly
* intended to rule out checksums like Adler32 that are not intended for
* hashing and have high likelihood of collision with particular inputs.
*/
bool OCSYNC_EXPORT csync_is_collision_safe_hash(const QByteArray &checksum_header);

#endif /* _CSYNC_UTIL_H */
20 changes: 16 additions & 4 deletions src/libsync/propagatedownload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,13 +357,16 @@ void PropagateDownloadFile::start()
}
}

// If we have a conflict where size and mtime are identical,
// If we have a conflict where size of the file is unchanged,
// compare the remote checksum to the local one.
// Maybe it's not a real conflict and no download is necessary!
// If the hashes are collision safe and identical, we assume the content is too.
// For weak checksums, we only do that if the mtimes are also identical.
if (_item->_instruction == CSYNC_INSTRUCTION_CONFLICT
&& _item->_size == _item->_previousSize
&& _item->_modtime == _item->_previousModtime
&& !_item->_checksumHeader.isEmpty()) {
&& !_item->_checksumHeader.isEmpty()
&& (csync_is_collision_safe_hash(_item->_checksumHeader)
|| _item->_modtime == _item->_previousModtime)) {
qCDebug(lcPropagateDownload) << _item->_file << "may not need download, computing checksum";
auto computeChecksum = new ComputeChecksum(this);
computeChecksum->setChecksumType(parseChecksumHeaderType(_item->_checksumHeader));
Expand All @@ -379,8 +382,17 @@ void PropagateDownloadFile::start()
void PropagateDownloadFile::conflictChecksumComputed(const QByteArray &checksumType, const QByteArray &checksum)
{
if (makeChecksumHeader(checksumType, checksum) == _item->_checksumHeader) {
// No download necessary, just update fs and journal metadata
qCDebug(lcPropagateDownload) << _item->_file << "remote and local checksum match";
// No download necessary, just update metadata

// Apply the server mtime locally if necessary, ensuring the journal
// and local mtimes end up identical
auto fn = propagator()->getFilePath(_item->_file);
if (_item->_modtime != _item->_previousModtime) {
FileSystem::setModTime(fn, _item->_modtime);
emit propagator()->touchedFile(fn);
}
_item->_modtime = FileSystem::getModTime(fn);
updateMetadata(/*isConflict=*/false);
return;
}
Expand Down
91 changes: 55 additions & 36 deletions test/testsyncengine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,53 @@ private slots:
}
}

void testFakeConflict_data()
{
QTest::addColumn<bool>("sameMtime");
QTest::addColumn<QByteArray>("checksums");

QTest::addColumn<int>("expectedGET");

QTest::newRow("Same mtime, but no server checksum -> ignored in reconcile")
<< true << QByteArray()
<< 0;

QTest::newRow("Same mtime, weak server checksum differ -> downloaded")
<< true << QByteArray("Adler32:bad")
<< 1;

QTest::newRow("Same mtime, matching weak checksum -> skipped")
<< true << QByteArray("Adler32:2a2010d")
<< 0;

QTest::newRow("Same mtime, strong server checksum differ -> downloaded")
<< true << QByteArray("SHA1:bad")
<< 1;

QTest::newRow("Same mtime, matching strong checksum -> skipped")
<< true << QByteArray("SHA1:56900fb1d337cf7237ff766276b9c1e8ce507427")
<< 0;


QTest::newRow("mtime changed, but no server checksum -> download")
<< false << QByteArray()
<< 1;

QTest::newRow("mtime changed, weak checksum match -> download anyway")
<< false << QByteArray("Adler32:2a2010d")
<< 1;

QTest::newRow("mtime changed, strong checksum match -> skip")
<< false << QByteArray("SHA1:56900fb1d337cf7237ff766276b9c1e8ce507427")
<< 0;
}

void testFakeConflict()
{
QFETCH(bool, sameMtime);
QFETCH(QByteArray, checksums);
QFETCH(int, expectedGET);

FakeFolder fakeFolder{ FileInfo::A12_B12_C12_S12() };

int nGET = 0;
Expand All @@ -269,51 +314,25 @@ private slots:
auto mtime = QDateTime::currentDateTimeUtc().addDays(-4);
mtime.setMSecsSinceEpoch(mtime.toMSecsSinceEpoch() / 1000 * 1000);

// Conflict: Same content, mtime, but no server checksum
// -> ignored in reconcile
fakeFolder.localModifier().setContents("A/a1", 'C');
fakeFolder.localModifier().setModTime("A/a1", mtime);
fakeFolder.remoteModifier().setContents("A/a1", 'C');
if (!sameMtime)
mtime = mtime.addDays(1);
fakeFolder.remoteModifier().setModTime("A/a1", mtime);
remoteInfo.find("A/a1")->checksums = checksums;
QVERIFY(fakeFolder.syncOnce());
QCOMPARE(nGET, 0);
QCOMPARE(nGET, expectedGET);

// Conflict: Same content, mtime, but weak server checksum
// -> ignored in reconcile
mtime = mtime.addDays(1);
fakeFolder.localModifier().setContents("A/a1", 'D');
fakeFolder.localModifier().setModTime("A/a1", mtime);
fakeFolder.remoteModifier().setContents("A/a1", 'D');
fakeFolder.remoteModifier().setModTime("A/a1", mtime);
remoteInfo.find("A/a1")->checksums = "Adler32:bad";
QVERIFY(fakeFolder.syncOnce());
QCOMPARE(nGET, 0);

// Conflict: Same content, mtime, but server checksum differs
// -> downloaded
mtime = mtime.addDays(1);
fakeFolder.localModifier().setContents("A/a1", 'W');
fakeFolder.localModifier().setModTime("A/a1", mtime);
fakeFolder.remoteModifier().setContents("A/a1", 'W');
fakeFolder.remoteModifier().setModTime("A/a1", mtime);
remoteInfo.find("A/a1")->checksums = "SHA1:bad";
QVERIFY(fakeFolder.syncOnce());
QCOMPARE(nGET, 1);

// Conflict: Same content, mtime, matching checksums
// -> PropagateDownload, but it skips the download
mtime = mtime.addDays(1);
fakeFolder.localModifier().setContents("A/a1", 'C');
fakeFolder.localModifier().setModTime("A/a1", mtime);
fakeFolder.remoteModifier().setContents("A/a1", 'C');
fakeFolder.remoteModifier().setModTime("A/a1", mtime);
remoteInfo.find("A/a1")->checksums = "SHA1:56900fb1d337cf7237ff766276b9c1e8ce507427";
QVERIFY(fakeFolder.syncOnce());
QCOMPARE(nGET, 1);
// check that mtime in journal and filesystem agree
QString a1path = fakeFolder.localPath() + "A/a1";
SyncJournalFileRecord a1record;
fakeFolder.syncJournal().getFileRecord(QByteArray("A/a1"), &a1record);
QCOMPARE(a1record._modtime, (qint64)FileSystem::getModTime(a1path));

// Extra sync reads from db, no difference
QVERIFY(fakeFolder.syncOnce());
QCOMPARE(nGET, 1);
QCOMPARE(nGET, expectedGET);
}

/**
Expand Down