Skip to content

Commit

Permalink
Merge pull request #1580 from b4n/name-escape
Browse files Browse the repository at this point in the history
Escape leading spaces in tag names instead of stripping them
  • Loading branch information
b4n committed Nov 28, 2017
2 parents 157918c + 7195e6a commit 248cffc
Show file tree
Hide file tree
Showing 14 changed files with 86 additions and 62 deletions.
5 changes: 4 additions & 1 deletion Tmain/broken-tagname.d/stderr-expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ Reading command line arguments
CTagsSelfTest requires a memory stream for input
OPENING input.cst as CTagsSelfTest language file [new,required]
Initialize parser: CTagsSelfTest
Unexpected character (0 < *c && *c < 0x20) included in a tagEntryInfo: one
Unexpected character 0x0a included in a tagEntryInfo: one
ofbroken name
File: input.cst, Line: 1, Lang: CTagsSelfTest, Kind: b
Escape the character
Unexpected character 0x09 included in a tagEntryInfo: \Broken Context
File: input.cst, Line: 1, Lang: CTagsSelfTest, Kind: b
Escape the character
sorting tag file
Expand Down

This file was deleted.

3 changes: 0 additions & 3 deletions Units/parser-html.r/whitespace-prefixed.html.d/input.html

This file was deleted.

1 change: 1 addition & 0 deletions Units/parser-html.r/whitespaces.html.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--sort=no
3 changes: 3 additions & 0 deletions Units/parser-html.r/whitespaces.html.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
heading1 input.html /^<\/h1>$/;" h
heading2 input.html /^<h1> heading2 <\/h1>$/;" h
heading 3 input.html /^<h1>heading 3 <\/h1>$/;" h
5 changes: 5 additions & 0 deletions Units/parser-html.r/whitespaces.html.d/input.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<h1>
heading1
</h1>
<h1> heading2 </h1>
<h1>heading 3 </h1>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
\n input.js /^let`$/;" v
1 change: 1 addition & 0 deletions Units/parser-javascript.r/js-odd-method-names.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--sort=no
12 changes: 12 additions & 0 deletions Units/parser-javascript.r/js-odd-method-names.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
\x21hello input.js /^ '!hello': function(){},$/;" m class:object
\x20hello input.js /^ ' hello': function(){},$/;" m class:object
<hello input.js /^ '<hello': function(){},$/;" m class:object
>hello input.js /^ '>hello': function(){},$/;" m class:object
\thello input.js /^ ' hello': function(){},$/;" m class:object
\\hello input.js /^ '\\\\hello': function(){},$/;" m class:object
;"hello input.js /^ ';"hello': function(){},$/;" m class:object
"hello input.js /^ '"hello': function(){},$/;" m class:object
'hello input.js /^ "'hello": function(){},$/;" m class:object
hello! input.js /^ 'hello!': function(){},$/;" m class:object
hello input.js /^ 'hello ': function(){},$/;" m class:object
object input.js /^var object = {$/;" c
13 changes: 13 additions & 0 deletions Units/parser-javascript.r/js-odd-method-names.d/input.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
var object = {
'!hello': function(){},
' hello': function(){},
'<hello': function(){},
'>hello': function(){},
' hello': function(){},
'\\hello': function(){},
';"hello': function(){},
'"hello': function(){},
"'hello": function(){},
'hello!': function(){},
'hello ': function(){},
};
18 changes: 12 additions & 6 deletions docs/format.rst
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,14 @@ A tagfield has a name, a colon, and a value: "name:value".
must be doubled!

EXCEPTION: Universal ctags introduces more conversion rules.
The characters in range 0 to 0x20 and 0x7F is converted
to \x prefixed hexadecimal number if the characters are not handled
in the abouve "value" rules.

- When a value contains a "\\a", this stands for a <BEL> (0x07).
- When a value contains a "\\b", this stands for a <BS> (0x08).
- When a value contains a "\\v", this stands for a <VT> (0x0b).
- When a value contains a "\\f", this stands for a <FF> (0x0c).
- The characters in range 0x01 to 0x1F included, 0x7F, and leading space
(0x20) and '!' (0x21) are converted to \x prefixed hexadecimal number if
the characters are not handled in the above "value" rules.

Proposed tagfield names:

Expand Down Expand Up @@ -468,9 +473,10 @@ Exceptions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#. {tagname} in tags file generated by Universal ctags may contain
spaces. Parsers for documents like Tex and reStructuredText need
this exceptions. See {tagname} of Proposal section for more detail
about the conversion.
spaces and several escape sequences. Parsers for documents like Tex and
reStructuredText, or liberal languages such as JavaScript need these
exceptions. See {tagname} of Proposal section for more detail about the
conversion.

.. _compat-output:

Expand Down
29 changes: 1 addition & 28 deletions main/entry.c
Original file line number Diff line number Diff line change
Expand Up @@ -1294,25 +1294,8 @@ static void makeTagEntriesForSubwords (tagEntryInfo *const subtag)
stringListDelete (list);
}

static char *trimPrefixedWhitespaces (const char *name)
extern int makeTagEntry (const tagEntryInfo *const tag)
{
const char *start;

for (start = name; isspace(*start); start++)
;

if (start != name)
return eStrdup (start);

return NULL;
}

extern int makeTagEntry (const tagEntryInfo *const tag_const)
{
char *trimmed_name;
const tagEntryInfo *tag = tag_const;
tagEntryInfo tag_backingstore;

int r = CORK_NIL;
Assert (tag->name != NULL);

Expand All @@ -1329,14 +1312,6 @@ extern int makeTagEntry (const tagEntryInfo *const tag_const)
return CORK_NIL;
}

trimmed_name = trimPrefixedWhitespaces (tag->name);
if (trimmed_name)
{
tag_backingstore = *tag_const;
tag = &tag_backingstore;
tag_backingstore.name = trimmed_name;
}

if (tag->name [0] == '\0' && (!tag->placeholder))
{
if (!doesInputLanguageAllowNullTag())
Expand All @@ -1356,8 +1331,6 @@ extern int makeTagEntry (const tagEntryInfo *const tag_const)
makeTagEntriesForSubwords (&subtag);
}
out:
if (trimmed_name)
eFree (trimmed_name);
return r;
}

Expand Down
52 changes: 31 additions & 21 deletions main/field.c
Original file line number Diff line number Diff line change
Expand Up @@ -382,42 +382,52 @@ static const char *renderEscapedString (const char *s,
return vStringValue (b);
}

static const char *renderEscapedName (const char* s,
static const char *renderEscapedName (const bool isTagName,
const char* s,
const tagEntryInfo *const tag,
vString* b)
{
const char* base = s;
int unexpected_byte = 0;

for (; *s; s++)
if (isTagName && (*s == ' ' || *s == '!'))
{
int c = *s;
if ((c > 0x00 && c <= 0x1F) || c == 0x7F)
/* Don't allow a leading space or exclamation mark as it conflicts with
* pseudo-tags when sorting. Anything with a lower byte value is
* escaped by renderEscapedString() already. */
unexpected_byte = *s;
switch (*s)
{
const kindDefinition *kdef = getTagKind (tag);
verbose ("Unexpected character (0 < *c && *c < 0x20) included in a tagEntryInfo: %s\n", base);
verbose ("File: %s, Line: %lu, Lang: %s, Kind: %c\n",
tag->inputFileName, tag->lineNumber, getLanguageName(tag->langType), kdef->letter);
verbose ("Escape the character\n");
break;
case ' ': vStringCatS (b, "\\x20"); s++; break;
case '!': vStringCatS (b, "\\x21"); s++; break;
default: AssertNotReached();
}
else if (c == '\\')
break;
else
continue;
}
else
{
/* Find the first byte needing escaping for the warning message */
const char *p = s;

if (*s == '\0')
return base;
while (*p > 0x1F && *p != 0x7F)
p++;
unexpected_byte = *p;
}

vStringNCatS (b, base, s - base);
if (unexpected_byte)
{
const kindDefinition *kdef = getTagKind (tag);
verbose ("Unexpected character %#04x included in a tagEntryInfo: %s\n", unexpected_byte, s);
verbose ("File: %s, Line: %lu, Lang: %s, Kind: %c\n",
tag->inputFileName, tag->lineNumber, getLanguageName(tag->langType), kdef->letter);
verbose ("Escape the character\n");
}

return renderEscapedString (s, tag, b);
}

static const char *renderFieldName (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b,
bool *rejected CTAGS_ATTR_UNUSED)
{
return renderEscapedName (tag->name, tag, b);
return renderEscapedName (true, tag->name, tag, b);
}

static const char *renderFieldNameNoEscape (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b,
Expand Down Expand Up @@ -471,7 +481,7 @@ static const char *renderFieldScope (const tagEntryInfo *const tag, const char *
const char* scope;

getTagScopeInformation ((tagEntryInfo *const)tag, NULL, &scope);
return scope? renderEscapedName (scope, tag, b): NULL;
return scope? renderEscapedName (false, scope, tag, b): NULL;
}

static const char *renderFieldScopeNoEscape (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b,
Expand Down Expand Up @@ -499,7 +509,7 @@ static const char *renderFieldInherits (const tagEntryInfo *const tag, const cha
static const char *renderFieldTyperef (const tagEntryInfo *const tag, const char *value CTAGS_ATTR_UNUSED, vString* b,
bool *rejected CTAGS_ATTR_UNUSED)
{
return renderEscapedName (WITH_DEFUALT_VALUE (tag->extensionFields.typeRef [1]), tag, b);
return renderEscapedName (false, WITH_DEFUALT_VALUE (tag->extensionFields.typeRef [1]), tag, b);
}


Expand Down
4 changes: 2 additions & 2 deletions parsers/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,7 @@ static void readTokenText (tokenInfo *const token, bool collectText)
c = ' ';
if (c != ' ' || lastC != ' ')
{
if (collectText)
vStringPut (token->string, c);
vStringPut (token->string, c);
lastC = c;
}
}
Expand Down Expand Up @@ -460,6 +459,7 @@ static void readTag (tokenInfo *token, vString *text, int depth)
else
headingKind = K_HEADING3;

vStringStripLeading (text);
vStringStripTrailing (text);
makeSimpleTag (text, headingKind);
}
Expand Down

0 comments on commit 248cffc

Please sign in to comment.