diff --git a/Tmain/broken-tagname.d/stderr-expected.txt b/Tmain/broken-tagname.d/stderr-expected.txt index 5b1e1fcd8e..226e6dc4fd 100644 --- a/Tmain/broken-tagname.d/stderr-expected.txt +++ b/Tmain/broken-tagname.d/stderr-expected.txt @@ -3,8 +3,11 @@ Reading command line arguments CTagsSelfTest requires a memory stream for input OPENING input.cst as CTagsSelfTest language file [new,required] Initialize parser: CTagsSelfTest -Unexpected character (0 < *c && *c < 0x20) included in a tagEntryInfo: one +Unexpected character 0x0a included in a tagEntryInfo: one of broken name File: input.cst, Line: 1, Lang: CTagsSelfTest, Kind: b Escape the character +Unexpected character 0x09 included in a tagEntryInfo: \Broken Context +File: input.cst, Line: 1, Lang: CTagsSelfTest, Kind: b +Escape the character sorting tag file diff --git a/Units/parser-html.r/whitespace-prefixed.html.d/expected.tags b/Units/parser-html.r/whitespace-prefixed.html.d/expected.tags deleted file mode 100644 index 9e54554063..0000000000 --- a/Units/parser-html.r/whitespace-prefixed.html.d/expected.tags +++ /dev/null @@ -1 +0,0 @@ -heading1 input.html /^<\/h1>$/;" h diff --git a/Units/parser-html.r/whitespace-prefixed.html.d/input.html b/Units/parser-html.r/whitespace-prefixed.html.d/input.html deleted file mode 100644 index c3727d4d0b..0000000000 --- a/Units/parser-html.r/whitespace-prefixed.html.d/input.html +++ /dev/null @@ -1,3 +0,0 @@ -

- heading1 -

diff --git a/Units/parser-html.r/whitespaces.html.d/args.ctags b/Units/parser-html.r/whitespaces.html.d/args.ctags new file mode 100644 index 0000000000..5ee5f79f70 --- /dev/null +++ b/Units/parser-html.r/whitespaces.html.d/args.ctags @@ -0,0 +1 @@ +--sort=no diff --git a/Units/parser-html.r/whitespaces.html.d/expected.tags b/Units/parser-html.r/whitespaces.html.d/expected.tags new file mode 100644 index 0000000000..3c1f42b370 --- /dev/null +++ b/Units/parser-html.r/whitespaces.html.d/expected.tags @@ -0,0 +1,3 @@ +heading1 input.html /^<\/h1>$/;" h +heading2 input.html /^

heading2 <\/h1>$/;" h +heading 3 input.html /^

heading 3 <\/h1>$/;" h diff --git a/Units/parser-html.r/whitespaces.html.d/input.html b/Units/parser-html.r/whitespaces.html.d/input.html new file mode 100644 index 0000000000..6bc7f04a41 --- /dev/null +++ b/Units/parser-html.r/whitespaces.html.d/input.html @@ -0,0 +1,5 @@ +

+ heading1 +

+

heading2

+

heading 3

diff --git a/Units/parser-javascript.r/js-broken-template.d/expected.tags b/Units/parser-javascript.r/js-broken-template.d/expected.tags index e69de29bb2..3dec97c22e 100644 --- a/Units/parser-javascript.r/js-broken-template.d/expected.tags +++ b/Units/parser-javascript.r/js-broken-template.d/expected.tags @@ -0,0 +1 @@ +\n input.js /^let`$/;" v diff --git a/Units/parser-javascript.r/js-odd-method-names.d/args.ctags b/Units/parser-javascript.r/js-odd-method-names.d/args.ctags new file mode 100644 index 0000000000..5ee5f79f70 --- /dev/null +++ b/Units/parser-javascript.r/js-odd-method-names.d/args.ctags @@ -0,0 +1 @@ +--sort=no diff --git a/Units/parser-javascript.r/js-odd-method-names.d/expected.tags b/Units/parser-javascript.r/js-odd-method-names.d/expected.tags new file mode 100644 index 0000000000..80f3fc9de9 --- /dev/null +++ b/Units/parser-javascript.r/js-odd-method-names.d/expected.tags @@ -0,0 +1,12 @@ +\x21hello input.js /^ '!hello': function(){},$/;" m class:object +\x20hello input.js /^ ' hello': function(){},$/;" m class:object +hello input.js /^ '>hello': function(){},$/;" m class:object +\thello input.js /^ ' hello': function(){},$/;" m class:object +\\hello input.js /^ '\\\\hello': function(){},$/;" m class:object +;"hello input.js /^ ';"hello': function(){},$/;" m class:object +"hello input.js /^ '"hello': function(){},$/;" m class:object +'hello input.js /^ "'hello": function(){},$/;" m class:object +hello! input.js /^ 'hello!': function(){},$/;" m class:object +hello input.js /^ 'hello ': function(){},$/;" m class:object +object input.js /^var object = {$/;" c diff --git a/Units/parser-javascript.r/js-odd-method-names.d/input.js b/Units/parser-javascript.r/js-odd-method-names.d/input.js new file mode 100644 index 0000000000..eb68acf38b --- /dev/null +++ b/Units/parser-javascript.r/js-odd-method-names.d/input.js @@ -0,0 +1,13 @@ +var object = { + '!hello': function(){}, + ' hello': function(){}, + 'hello': function(){}, + ' hello': function(){}, + '\\hello': function(){}, + ';"hello': function(){}, + '"hello': function(){}, + "'hello": function(){}, + 'hello!': function(){}, + 'hello ': function(){}, +}; diff --git a/docs/format.rst b/docs/format.rst index e045eda879..aec95fb053 100644 --- a/docs/format.rst +++ b/docs/format.rst @@ -269,9 +269,14 @@ A tagfield has a name, a colon, and a value: "name:value". must be doubled! EXCEPTION: Universal ctags introduces more conversion rules. - The characters in range 0 to 0x20 and 0x7F is converted - to \x prefixed hexadecimal number if the characters are not handled - in the abouve "value" rules. + + - When a value contains a "\\a", this stands for a (0x07). + - When a value contains a "\\b", this stands for a (0x08). + - When a value contains a "\\v", this stands for a (0x0b). + - When a value contains a "\\f", this stands for a (0x0c). + - The characters in range 0x01 to 0x1F included, 0x7F, and leading space + (0x20) and '!' (0x21) are converted to \x prefixed hexadecimal number if + the characters are not handled in the above "value" rules. Proposed tagfield names: @@ -468,9 +473,10 @@ Exceptions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #. {tagname} in tags file generated by Universal ctags may contain - spaces. Parsers for documents like Tex and reStructuredText need - this exceptions. See {tagname} of Proposal section for more detail - about the conversion. + spaces and several escape sequences. Parsers for documents like Tex and + reStructuredText, or liberal languages such as JavaScript need these + exceptions. See {tagname} of Proposal section for more detail about the + conversion. .. _compat-output: diff --git a/main/entry.c b/main/entry.c index ef4202a789..b6f0efce3d 100644 --- a/main/entry.c +++ b/main/entry.c @@ -1294,25 +1294,8 @@ static void makeTagEntriesForSubwords (tagEntryInfo *const subtag) stringListDelete (list); } -static char *trimPrefixedWhitespaces (const char *name) +extern int makeTagEntry (const tagEntryInfo *const tag) { - const char *start; - - for (start = name; isspace(*start); start++) - ; - - if (start != name) - return eStrdup (start); - - return NULL; -} - -extern int makeTagEntry (const tagEntryInfo *const tag_const) -{ - char *trimmed_name; - const tagEntryInfo *tag = tag_const; - tagEntryInfo tag_backingstore; - int r = CORK_NIL; Assert (tag->name != NULL); @@ -1329,14 +1312,6 @@ extern int makeTagEntry (const tagEntryInfo *const tag_const) return CORK_NIL; } - trimmed_name = trimPrefixedWhitespaces (tag->name); - if (trimmed_name) - { - tag_backingstore = *tag_const; - tag = &tag_backingstore; - tag_backingstore.name = trimmed_name; - } - if (tag->name [0] == '\0' && (!tag->placeholder)) { if (!doesInputLanguageAllowNullTag()) @@ -1356,8 +1331,6 @@ extern int makeTagEntry (const tagEntryInfo *const tag_const) makeTagEntriesForSubwords (&subtag); } out: - if (trimmed_name) - eFree (trimmed_name); return r; } diff --git a/main/field.c b/main/field.c index d33dc536dc..252f7248ed 100644 --- a/main/field.c +++ b/main/field.c @@ -382,34 +382,44 @@ static const char *renderEscapedString (const char *s, return vStringValue (b); } -static const char *renderEscapedName (const char* s, +static const char *renderEscapedName (const bool isTagName, + const char* s, const tagEntryInfo *const tag, vString* b) { - const char* base = s; + int unexpected_byte = 0; - for (; *s; s++) + if (isTagName && (*s == ' ' || *s == '!')) { - int c = *s; - if ((c > 0x00 && c <= 0x1F) || c == 0x7F) + /* Don't allow a leading space or exclamation mark as it conflicts with + * pseudo-tags when sorting. Anything with a lower byte value is + * escaped by renderEscapedString() already. */ + unexpected_byte = *s; + switch (*s) { - const kindDefinition *kdef = getTagKind (tag); - verbose ("Unexpected character (0 < *c && *c < 0x20) included in a tagEntryInfo: %s\n", base); - verbose ("File: %s, Line: %lu, Lang: %s, Kind: %c\n", - tag->inputFileName, tag->lineNumber, getLanguageName(tag->langType), kdef->letter); - verbose ("Escape the character\n"); - break; + case ' ': vStringCatS (b, "\\x20"); s++; break; + case '!': vStringCatS (b, "\\x21"); s++; break; + default: AssertNotReached(); } - else if (c == '\\') - break; - else - continue; } + else + { + /* Find the first byte needing escaping for the warning message */ + const char *p = s; - if (*s == '\0') - return base; + while (*p > 0x1F && *p != 0x7F) + p++; + unexpected_byte = *p; + } - vStringNCatS (b, base, s - base); + if (unexpected_byte) + { + const kindDefinition *kdef = getTagKind (tag); + verbose ("Unexpected character %#04x included in a tagEntryInfo: %s\n", unexpected_byte, s); + verbose ("File: %s, Line: %lu, Lang: %s, Kind: %c\n", + tag->inputFileName, tag->lineNumber, getLanguageName(tag->langType), kdef->letter); + verbose ("Escape the character\n"); + } return renderEscapedString (s, tag, b); } @@ -417,7 +427,7 @@ static const char *renderEscapedName (const char* s, static const char *renderFieldName (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b, bool *rejected CTAGS_ATTR_UNUSED) { - return renderEscapedName (tag->name, tag, b); + return renderEscapedName (true, tag->name, tag, b); } static const char *renderFieldNameNoEscape (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b, @@ -471,7 +481,7 @@ static const char *renderFieldScope (const tagEntryInfo *const tag, const char * const char* scope; getTagScopeInformation ((tagEntryInfo *const)tag, NULL, &scope); - return scope? renderEscapedName (scope, tag, b): NULL; + return scope? renderEscapedName (false, scope, tag, b): NULL; } static const char *renderFieldScopeNoEscape (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b, @@ -499,7 +509,7 @@ static const char *renderFieldInherits (const tagEntryInfo *const tag, const cha static const char *renderFieldTyperef (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b, bool *rejected CTAGS_ATTR_UNUSED) { - return renderEscapedName (WITH_DEFUALT_VALUE (tag->extensionFields.typeRef [1]), tag, b); + return renderEscapedName (false, WITH_DEFUALT_VALUE (tag->extensionFields.typeRef [1]), tag, b); } diff --git a/parsers/html.c b/parsers/html.c index 78c8dbbba0..8d2a4abfef 100644 --- a/parsers/html.c +++ b/parsers/html.c @@ -178,8 +178,7 @@ static void readTokenText (tokenInfo *const token, bool collectText) c = ' '; if (c != ' ' || lastC != ' ') { - if (collectText) - vStringPut (token->string, c); + vStringPut (token->string, c); lastC = c; } } @@ -460,6 +459,7 @@ static void readTag (tokenInfo *token, vString *text, int depth) else headingKind = K_HEADING3; + vStringStripLeading (text); vStringStripTrailing (text); makeSimpleTag (text, headingKind); }