diff --git a/Tmain/broken-tagname.d/stderr-expected.txt b/Tmain/broken-tagname.d/stderr-expected.txt
index 5b1e1fcd8e..226e6dc4fd 100644
--- a/Tmain/broken-tagname.d/stderr-expected.txt
+++ b/Tmain/broken-tagname.d/stderr-expected.txt
@@ -3,8 +3,11 @@ Reading command line arguments
CTagsSelfTest requires a memory stream for input
OPENING input.cst as CTagsSelfTest language file [new,required]
Initialize parser: CTagsSelfTest
-Unexpected character (0 < *c && *c < 0x20) included in a tagEntryInfo: one
+Unexpected character 0x0a included in a tagEntryInfo: one
of
broken name
File: input.cst, Line: 1, Lang: CTagsSelfTest, Kind: b
Escape the character
+Unexpected character 0x09 included in a tagEntryInfo: \Broken Context
+File: input.cst, Line: 1, Lang: CTagsSelfTest, Kind: b
+Escape the character
sorting tag file
diff --git a/Units/parser-html.r/whitespace-prefixed.html.d/expected.tags b/Units/parser-html.r/whitespace-prefixed.html.d/expected.tags
deleted file mode 100644
index 9e54554063..0000000000
--- a/Units/parser-html.r/whitespace-prefixed.html.d/expected.tags
+++ /dev/null
@@ -1 +0,0 @@
-heading1 input.html /^<\/h1>$/;" h
diff --git a/Units/parser-html.r/whitespace-prefixed.html.d/input.html b/Units/parser-html.r/whitespace-prefixed.html.d/input.html
deleted file mode 100644
index c3727d4d0b..0000000000
--- a/Units/parser-html.r/whitespace-prefixed.html.d/input.html
+++ /dev/null
@@ -1,3 +0,0 @@
-
- heading1
-
diff --git a/Units/parser-html.r/whitespaces.html.d/args.ctags b/Units/parser-html.r/whitespaces.html.d/args.ctags
new file mode 100644
index 0000000000..5ee5f79f70
--- /dev/null
+++ b/Units/parser-html.r/whitespaces.html.d/args.ctags
@@ -0,0 +1 @@
+--sort=no
diff --git a/Units/parser-html.r/whitespaces.html.d/expected.tags b/Units/parser-html.r/whitespaces.html.d/expected.tags
new file mode 100644
index 0000000000..3c1f42b370
--- /dev/null
+++ b/Units/parser-html.r/whitespaces.html.d/expected.tags
@@ -0,0 +1,3 @@
+heading1 input.html /^<\/h1>$/;" h
+heading2 input.html /^ heading2 <\/h1>$/;" h
+heading 3 input.html /^heading 3 <\/h1>$/;" h
diff --git a/Units/parser-html.r/whitespaces.html.d/input.html b/Units/parser-html.r/whitespaces.html.d/input.html
new file mode 100644
index 0000000000..6bc7f04a41
--- /dev/null
+++ b/Units/parser-html.r/whitespaces.html.d/input.html
@@ -0,0 +1,5 @@
+
+ heading1
+
+ heading2
+heading 3
diff --git a/Units/parser-javascript.r/js-broken-template.d/expected.tags b/Units/parser-javascript.r/js-broken-template.d/expected.tags
index e69de29bb2..3dec97c22e 100644
--- a/Units/parser-javascript.r/js-broken-template.d/expected.tags
+++ b/Units/parser-javascript.r/js-broken-template.d/expected.tags
@@ -0,0 +1 @@
+\n input.js /^let`$/;" v
diff --git a/Units/parser-javascript.r/js-odd-method-names.d/args.ctags b/Units/parser-javascript.r/js-odd-method-names.d/args.ctags
new file mode 100644
index 0000000000..5ee5f79f70
--- /dev/null
+++ b/Units/parser-javascript.r/js-odd-method-names.d/args.ctags
@@ -0,0 +1 @@
+--sort=no
diff --git a/Units/parser-javascript.r/js-odd-method-names.d/expected.tags b/Units/parser-javascript.r/js-odd-method-names.d/expected.tags
new file mode 100644
index 0000000000..80f3fc9de9
--- /dev/null
+++ b/Units/parser-javascript.r/js-odd-method-names.d/expected.tags
@@ -0,0 +1,12 @@
+\x21hello input.js /^ '!hello': function(){},$/;" m class:object
+\x20hello input.js /^ ' hello': function(){},$/;" m class:object
+hello input.js /^ '>hello': function(){},$/;" m class:object
+\thello input.js /^ ' hello': function(){},$/;" m class:object
+\\hello input.js /^ '\\\\hello': function(){},$/;" m class:object
+;"hello input.js /^ ';"hello': function(){},$/;" m class:object
+"hello input.js /^ '"hello': function(){},$/;" m class:object
+'hello input.js /^ "'hello": function(){},$/;" m class:object
+hello! input.js /^ 'hello!': function(){},$/;" m class:object
+hello input.js /^ 'hello ': function(){},$/;" m class:object
+object input.js /^var object = {$/;" c
diff --git a/Units/parser-javascript.r/js-odd-method-names.d/input.js b/Units/parser-javascript.r/js-odd-method-names.d/input.js
new file mode 100644
index 0000000000..eb68acf38b
--- /dev/null
+++ b/Units/parser-javascript.r/js-odd-method-names.d/input.js
@@ -0,0 +1,13 @@
+var object = {
+ '!hello': function(){},
+ ' hello': function(){},
+ 'hello': function(){},
+ ' hello': function(){},
+ '\\hello': function(){},
+ ';"hello': function(){},
+ '"hello': function(){},
+ "'hello": function(){},
+ 'hello!': function(){},
+ 'hello ': function(){},
+};
diff --git a/docs/format.rst b/docs/format.rst
index e045eda879..aec95fb053 100644
--- a/docs/format.rst
+++ b/docs/format.rst
@@ -269,9 +269,14 @@ A tagfield has a name, a colon, and a value: "name:value".
must be doubled!
EXCEPTION: Universal ctags introduces more conversion rules.
- The characters in range 0 to 0x20 and 0x7F is converted
- to \x prefixed hexadecimal number if the characters are not handled
- in the abouve "value" rules.
+
+ - When a value contains a "\\a", this stands for a (0x07).
+ - When a value contains a "\\b", this stands for a (0x08).
+ - When a value contains a "\\v", this stands for a (0x0b).
+ - When a value contains a "\\f", this stands for a (0x0c).
+ - The characters in range 0x01 to 0x1F included, 0x7F, and leading space
+ (0x20) and '!' (0x21) are converted to \x prefixed hexadecimal number if
+ the characters are not handled in the above "value" rules.
Proposed tagfield names:
@@ -468,9 +473,10 @@ Exceptions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#. {tagname} in tags file generated by Universal ctags may contain
- spaces. Parsers for documents like Tex and reStructuredText need
- this exceptions. See {tagname} of Proposal section for more detail
- about the conversion.
+ spaces and several escape sequences. Parsers for documents like Tex and
+ reStructuredText, or liberal languages such as JavaScript need these
+ exceptions. See {tagname} of Proposal section for more detail about the
+ conversion.
.. _compat-output:
diff --git a/main/entry.c b/main/entry.c
index ef4202a789..b6f0efce3d 100644
--- a/main/entry.c
+++ b/main/entry.c
@@ -1294,25 +1294,8 @@ static void makeTagEntriesForSubwords (tagEntryInfo *const subtag)
stringListDelete (list);
}
-static char *trimPrefixedWhitespaces (const char *name)
+extern int makeTagEntry (const tagEntryInfo *const tag)
{
- const char *start;
-
- for (start = name; isspace(*start); start++)
- ;
-
- if (start != name)
- return eStrdup (start);
-
- return NULL;
-}
-
-extern int makeTagEntry (const tagEntryInfo *const tag_const)
-{
- char *trimmed_name;
- const tagEntryInfo *tag = tag_const;
- tagEntryInfo tag_backingstore;
-
int r = CORK_NIL;
Assert (tag->name != NULL);
@@ -1329,14 +1312,6 @@ extern int makeTagEntry (const tagEntryInfo *const tag_const)
return CORK_NIL;
}
- trimmed_name = trimPrefixedWhitespaces (tag->name);
- if (trimmed_name)
- {
- tag_backingstore = *tag_const;
- tag = &tag_backingstore;
- tag_backingstore.name = trimmed_name;
- }
-
if (tag->name [0] == '\0' && (!tag->placeholder))
{
if (!doesInputLanguageAllowNullTag())
@@ -1356,8 +1331,6 @@ extern int makeTagEntry (const tagEntryInfo *const tag_const)
makeTagEntriesForSubwords (&subtag);
}
out:
- if (trimmed_name)
- eFree (trimmed_name);
return r;
}
diff --git a/main/field.c b/main/field.c
index d33dc536dc..252f7248ed 100644
--- a/main/field.c
+++ b/main/field.c
@@ -382,34 +382,44 @@ static const char *renderEscapedString (const char *s,
return vStringValue (b);
}
-static const char *renderEscapedName (const char* s,
+static const char *renderEscapedName (const bool isTagName,
+ const char* s,
const tagEntryInfo *const tag,
vString* b)
{
- const char* base = s;
+ int unexpected_byte = 0;
- for (; *s; s++)
+ if (isTagName && (*s == ' ' || *s == '!'))
{
- int c = *s;
- if ((c > 0x00 && c <= 0x1F) || c == 0x7F)
+ /* Don't allow a leading space or exclamation mark as it conflicts with
+ * pseudo-tags when sorting. Anything with a lower byte value is
+ * escaped by renderEscapedString() already. */
+ unexpected_byte = *s;
+ switch (*s)
{
- const kindDefinition *kdef = getTagKind (tag);
- verbose ("Unexpected character (0 < *c && *c < 0x20) included in a tagEntryInfo: %s\n", base);
- verbose ("File: %s, Line: %lu, Lang: %s, Kind: %c\n",
- tag->inputFileName, tag->lineNumber, getLanguageName(tag->langType), kdef->letter);
- verbose ("Escape the character\n");
- break;
+ case ' ': vStringCatS (b, "\\x20"); s++; break;
+ case '!': vStringCatS (b, "\\x21"); s++; break;
+ default: AssertNotReached();
}
- else if (c == '\\')
- break;
- else
- continue;
}
+ else
+ {
+ /* Find the first byte needing escaping for the warning message */
+ const char *p = s;
- if (*s == '\0')
- return base;
+ while (*p > 0x1F && *p != 0x7F)
+ p++;
+ unexpected_byte = *p;
+ }
- vStringNCatS (b, base, s - base);
+ if (unexpected_byte)
+ {
+ const kindDefinition *kdef = getTagKind (tag);
+ verbose ("Unexpected character %#04x included in a tagEntryInfo: %s\n", unexpected_byte, s);
+ verbose ("File: %s, Line: %lu, Lang: %s, Kind: %c\n",
+ tag->inputFileName, tag->lineNumber, getLanguageName(tag->langType), kdef->letter);
+ verbose ("Escape the character\n");
+ }
return renderEscapedString (s, tag, b);
}
@@ -417,7 +427,7 @@ static const char *renderEscapedName (const char* s,
static const char *renderFieldName (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b,
bool *rejected CTAGS_ATTR_UNUSED)
{
- return renderEscapedName (tag->name, tag, b);
+ return renderEscapedName (true, tag->name, tag, b);
}
static const char *renderFieldNameNoEscape (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b,
@@ -471,7 +481,7 @@ static const char *renderFieldScope (const tagEntryInfo *const tag, const char *
const char* scope;
getTagScopeInformation ((tagEntryInfo *const)tag, NULL, &scope);
- return scope? renderEscapedName (scope, tag, b): NULL;
+ return scope? renderEscapedName (false, scope, tag, b): NULL;
}
static const char *renderFieldScopeNoEscape (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b,
@@ -499,7 +509,7 @@ static const char *renderFieldInherits (const tagEntryInfo *const tag, const cha
static const char *renderFieldTyperef (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b,
bool *rejected CTAGS_ATTR_UNUSED)
{
- return renderEscapedName (WITH_DEFUALT_VALUE (tag->extensionFields.typeRef [1]), tag, b);
+ return renderEscapedName (false, WITH_DEFUALT_VALUE (tag->extensionFields.typeRef [1]), tag, b);
}
diff --git a/parsers/html.c b/parsers/html.c
index 78c8dbbba0..8d2a4abfef 100644
--- a/parsers/html.c
+++ b/parsers/html.c
@@ -178,8 +178,7 @@ static void readTokenText (tokenInfo *const token, bool collectText)
c = ' ';
if (c != ' ' || lastC != ' ')
{
- if (collectText)
- vStringPut (token->string, c);
+ vStringPut (token->string, c);
lastC = c;
}
}
@@ -460,6 +459,7 @@ static void readTag (tokenInfo *token, vString *text, int depth)
else
headingKind = K_HEADING3;
+ vStringStripLeading (text);
vStringStripTrailing (text);
makeSimpleTag (text, headingKind);
}