Skip to content

Commit

Permalink
Preserve en/em/non-breaking/hair space etc. while minifying (#849)
Browse files Browse the repository at this point in the history
  • Loading branch information
papandreou authored and alexlamsl committed Sep 11, 2017
1 parent 5241da9 commit 7683fbc
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 14 deletions.
26 changes: 12 additions & 14 deletions src/htmlminifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,16 @@ var TokenChain = require('./tokenchain');
var UglifyJS = require('uglify-js');
var utils = require('./utils');

var trimWhitespace = String.prototype.trim ? function(str) {
function trimWhitespace(str) {
if (typeof str !== 'string') {
return str;
}
return str.trim();
} : function(str) {
if (typeof str !== 'string') {
return str;
}
return str.replace(/^\s+/, '').replace(/\s+$/, '');
};
return str.replace(/^[ \n\r\t\f]+/, '').replace(/[ \n\r\t\f]+$/, '');
}

function collapseWhitespaceAll(str) {
return str && str.replace(/\s+/g, function(spaces) {
// Non-breaking space is specifically handled inside the replacer function here:
return str && str.replace(/[ \n\r\t\f\xA0]+/g, function(spaces) {
return spaces === '\t' ? '\t' : spaces.replace(/(^|\xA0+)[^\xA0]+/g, '$1 ');
});
}
Expand All @@ -30,17 +26,18 @@ function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) {
var lineBreakBefore = '', lineBreakAfter = '';

if (options.preserveLineBreaks) {
str = str.replace(/^\s*?[\n\r]\s*/, function() {
str = str.replace(/^[ \n\r\t\f]*?[\n\r][ \n\r\t\f]*/, function() {
lineBreakBefore = '\n';
return '';
}).replace(/\s*?[\n\r]\s*$/, function() {
}).replace(/[ \n\r\t\f]*?[\n\r][ \n\r\t\f]*$/, function() {
lineBreakAfter = '\n';
return '';
});
}

if (trimLeft) {
str = str.replace(/^\s+/, function(spaces) {
// Non-breaking space is specifically handled inside the replacer function here:
str = str.replace(/^[ \n\r\t\f\xA0]+/, function(spaces) {
var conservative = !lineBreakBefore && options.conservativeCollapse;
if (conservative && spaces === '\t') {
return '\t';
Expand All @@ -50,7 +47,8 @@ function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) {
}

if (trimRight) {
str = str.replace(/\s+$/, function(spaces) {
// Non-breaking space is specifically handled inside the replacer function here:
str = str.replace(/[ \n\r\t\f\xA0]+$/, function(spaces) {
var conservative = !lineBreakAfter && options.conservativeCollapse;
if (conservative && spaces === '\t') {
return '\t';
Expand Down Expand Up @@ -1253,7 +1251,7 @@ function minify(value, options, partialMarkup) {
return collapseWhitespace(chunk, {
preserveLineBreaks: options.preserveLineBreaks,
conservativeCollapse: !options.trimCustomFragments
}, /^\s/.test(chunk), /\s$/.test(chunk));
}, /^[ \n\r\t\f]/.test(chunk), /[ \n\r\t\f]$/.test(chunk));
}
return chunk;
});
Expand Down
38 changes: 38 additions & 0 deletions tests/minifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,44 @@ QUnit.test('space normalization around text', function(assert) {
assert.equal(minify(input, { collapseWhitespace: true }), output);
});

QUnit.test('types of whitespace that should always be preserved', function(assert) {
// Hair space:
var input = '<div>\u200afo\u200ao\u200a</div>';
assert.equal(minify(input, { collapseWhitespace: true }), input);

// Hair space passed as HTML entity:
var inputWithEntities = '<div>&#8202;fo&#8202;o&#8202;</div>';
assert.equal(minify(inputWithEntities, { collapseWhitespace: true }), inputWithEntities);

// Hair space passed as HTML entity, in decodeEntities:true mode:
assert.equal(minify(inputWithEntities, { collapseWhitespace: true, decodeEntities: true }), input);


// Non-breaking space:
input = '<div>\xa0fo\xa0o\xa0</div>';
assert.equal(minify(input, { collapseWhitespace: true }), input);

// Non-breaking space passed as HTML entity:
inputWithEntities = '<div>&nbsp;fo&nbsp;o&nbsp;</div>';
assert.equal(minify(inputWithEntities, { collapseWhitespace: true }), inputWithEntities);

// Non-breaking space passed as HTML entity, in decodeEntities:true mode:
assert.equal(minify(inputWithEntities, { collapseWhitespace: true, decodeEntities: true }), input);

// Do not remove hair space when preserving line breaks between tags:
input = '<p></p>\u200a\n<p></p>\n';
assert.equal(minify(input, { collapseWhitespace: true, preserveLineBreaks: true }), input);

// Preserve hair space in attributes:
input = '<p class="foo\u200abar"></p>';
assert.equal(minify(input, { collapseWhitespace: true }), input);

// Preserve hair space in class names when deduplicating and reordering:
input = '<a class="0 1\u200a3 2 3"></a>';
assert.equal(minify(input, { sortClassName: false }), input);
assert.equal(minify(input, { sortClassName: true }), input);
});

QUnit.test('doctype normalization', function(assert) {
var input;

Expand Down

0 comments on commit 7683fbc

Please sign in to comment.