Skip to content

Commit

Permalink
url: use SafeSet to filter known special protocols
Browse files Browse the repository at this point in the history
Avoids a maintenance hazard when reviewers assume that
`hostlessProtocol` and `slashedProtocol` are disjoint.

The following may be counter-intuitive:

```js
// These objects seem to have no keys in common
const hostlessProtocol = { 'javascript': true };
const slashedProtocol = { 'http': true };
// A reasonable reviewer may assumes bothTrue is never truthy
function bothTrue(lowerProto) {
  return hostlessProtocol[lowerProto] && slashedProtocol[lowerProto];
}
// But
console.log(Boolean(bothTrue('constructor')));  // true
```

This change uses SafeSet instead of plain-old objects.

----

Rejected alternative:

We could have used object with a `null` prototype as lookup tables
so that `lowerProto` is never treated as a key into `Object.prototype`.

```js
const hostlessProtocol = { __proto__: null, 'javascript': true };
const slashedProtocol = { __proto__: null, 'http': true };

function bothTrue(lowerProto) {
  return hostlessProtocol[lowerProto] && slashedProtocol[lowerProto];
}

console.log(Boolean(bothTrue('constructor')));  // false
```

PR-URL: #24703
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
mikesamuel authored and BridgeAR committed Dec 5, 2018
1 parent 9e5a79a commit be54dc0
Showing 1 changed file with 33 additions and 30 deletions.
63 changes: 33 additions & 30 deletions lib/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ const { toASCII } = process.binding('config').hasIntl ?

const { hexTable } = require('internal/querystring');

const { SafeSet } = require('internal/safe_globals');

const {
ERR_INVALID_ARG_TYPE
} = require('internal/errors').codes;
Expand Down Expand Up @@ -76,28 +78,28 @@ const simplePathPattern = /^(\/\/?(?!\/)[^?\s]*)(\?[^\s]*)?$/;

const hostnameMaxLen = 255;
// protocols that can allow "unsafe" and "unwise" chars.
const unsafeProtocol = {
'javascript': true,
'javascript:': true
};
const unsafeProtocol = new SafeSet([
'javascript',
'javascript:'
]);
// protocols that never have a hostname.
const hostlessProtocol = {
'javascript': true,
'javascript:': true
};
const hostlessProtocol = new SafeSet([
'javascript',
'javascript:'
]);
// protocols that always contain a // bit.
const slashedProtocol = {
'http': true,
'http:': true,
'https': true,
'https:': true,
'ftp': true,
'ftp:': true,
'gopher': true,
'gopher:': true,
'file': true,
'file:': true
};
const slashedProtocol = new SafeSet([
'http',
'http:',
'https',
'https:',
'ftp',
'ftp:',
'gopher',
'gopher:',
'file',
'file:'
]);
const {
CHAR_SPACE,
CHAR_TAB,
Expand Down Expand Up @@ -267,14 +269,14 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
if (slashesDenoteHost || proto || hostPattern.test(rest)) {
var slashes = rest.charCodeAt(0) === CHAR_FORWARD_SLASH &&
rest.charCodeAt(1) === CHAR_FORWARD_SLASH;
if (slashes && !(proto && hostlessProtocol[lowerProto])) {
if (slashes && !(proto && hostlessProtocol.has(lowerProto))) {
rest = rest.slice(2);
this.slashes = true;
}
}

if (!hostlessProtocol[lowerProto] &&
(slashes || (proto && !slashedProtocol[proto]))) {
if (!hostlessProtocol.has(lowerProto) &&
(slashes || (proto && !slashedProtocol.has(proto)))) {

// there's a hostname.
// the first instance of /, ?, ;, or # ends the host.
Expand Down Expand Up @@ -400,7 +402,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {

// now rest is set to the post-host stuff.
// chop off any delim chars.
if (!unsafeProtocol[lowerProto]) {
if (!unsafeProtocol.has(lowerProto)) {
// First, make 100% sure that any "autoEscape" chars get
// escaped, even if encodeURIComponent doesn't think they
// need to be.
Expand Down Expand Up @@ -447,7 +449,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
} else if (firstIdx > 0) {
this.pathname = rest.slice(0, firstIdx);
}
if (slashedProtocol[lowerProto] &&
if (slashedProtocol.has(lowerProto) &&
this.hostname && !this.pathname) {
this.pathname = '/';
}
Expand Down Expand Up @@ -629,7 +631,7 @@ Url.prototype.format = function format() {

// only the slashedProtocols get the //. Not mailto:, xmpp:, etc.
// unless they had them to begin with.
if (this.slashes || slashedProtocol[protocol]) {
if (this.slashes || slashedProtocol.has(protocol)) {
if (this.slashes || host) {
if (pathname && pathname.charCodeAt(0) !== CHAR_FORWARD_SLASH)
pathname = '/' + pathname;
Expand Down Expand Up @@ -701,7 +703,7 @@ Url.prototype.resolveObject = function resolveObject(relative) {
}

// urlParse appends trailing / to urls like http://www.example.com
if (slashedProtocol[result.protocol] &&
if (slashedProtocol.has(result.protocol) &&
result.hostname && !result.pathname) {
result.path = result.pathname = '/';
}
Expand All @@ -719,7 +721,7 @@ Url.prototype.resolveObject = function resolveObject(relative) {
// if it is file:, then the host is dropped,
// because that's known to be hostless.
// anything else is assumed to be absolute.
if (!slashedProtocol[relative.protocol]) {
if (!slashedProtocol.has(relative.protocol)) {
var keys = Object.keys(relative);
for (var v = 0; v < keys.length; v++) {
var k = keys[v];
Expand All @@ -732,7 +734,7 @@ Url.prototype.resolveObject = function resolveObject(relative) {
result.protocol = relative.protocol;
if (!relative.host &&
!/^file:?$/.test(relative.protocol) &&
!hostlessProtocol[relative.protocol]) {
!hostlessProtocol.has(relative.protocol)) {
const relPath = (relative.pathname || '').split('/');
while (relPath.length && !(relative.host = relPath.shift()));
if (!relative.host) relative.host = '';
Expand Down Expand Up @@ -769,7 +771,8 @@ Url.prototype.resolveObject = function resolveObject(relative) {
var removeAllDots = mustEndAbs;
var srcPath = result.pathname && result.pathname.split('/') || [];
var relPath = relative.pathname && relative.pathname.split('/') || [];
var noLeadingSlashes = result.protocol && !slashedProtocol[result.protocol];
var noLeadingSlashes = result.protocol &&
!slashedProtocol.has(result.protocol);

// if the url is a non-slashed url, then relative
// links like ../.. should be able
Expand Down

0 comments on commit be54dc0

Please sign in to comment.