Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

syntax: add a Quote func #727

Merged
merged 1 commit into from
Sep 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion expand/param.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,12 @@ func (cfg *Config) paramExp(pe *syntax.ParamExp) (string, error) {
case syntax.OtherParamOps:
switch arg {
case "Q":
str = strconv.Quote(str)
var ok bool
str, ok = syntax.Quote(str)
if !ok {
// Variables can't contain null bytes.
panic("syntax.Quote should never fail on a variable")
}
case "E":
tail := str
var rns []rune
Expand Down
35 changes: 35 additions & 0 deletions syntax/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,41 @@ func ExampleNewParser_options() {
// for ((i = 0; i < 5; i++)); do echo $i > f; done
}

// Keep in sync with FuzzQuote.

func ExampleQuote() {
for _, s := range []string{
"foo",
"bar $baz",
`"won't"`,
"~/home",
"#1304",
"name=value",
"for",
"glob-*",
"invalid-\xe2'",
"nonprint-\x0b\x1b",
} {
quoted, ok := syntax.Quote(s)
if !ok {
fmt.Printf("%q cannot be quoted", s)
} else {
fmt.Printf("Quote(%17q): %s\n", s, quoted)
}
}
// Output:
// Quote( "foo"): foo
// Quote( "bar $baz"): 'bar $baz'
// Quote( "\"won't\""): "\"won't\""
// Quote( "~/home"): '~/home'
// Quote( "#1304"): '#1304'
// Quote( "name=value"): 'name=value'
// Quote( "for"): 'for'
// Quote( "glob-*"): 'glob-*'
// Quote( "invalid-\xe2'"): $'invalid-\xe2\''
// Quote("nonprint-\v\x1b"): $'nonprint-\v\x1b'
}

func ExampleWalk() {
in := strings.NewReader(`echo $foo "and $bar"`)
f, err := syntax.NewParser().Parse(in, "")
Expand Down
48 changes: 48 additions & 0 deletions syntax/fuzz_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//go:build dev.fuzz
// +build dev.fuzz

package syntax_test

import (
"os/exec"
"testing"

"mvdan.cc/sh/v3/syntax"
)

func FuzzQuote(f *testing.F) {
// Keep in sync with ExampleQuote.
f.Add("foo")
f.Add("bar $baz")
f.Add(`"won't"`)
f.Add(`~/home`)
f.Add("#1304")
f.Add("name=value")
f.Add(`glob-*`)
f.Add("invalid-\xe2'")
f.Add("nonprint-\x0b\x1b")
f.Fuzz(func(t *testing.T, s string) {
quoted, ok := syntax.Quote(s)
if !ok {
// Contains a null byte; not interesting.
return
}
// Beware that this might run arbitrary code
// if Quote is too naive and allows ';' or '$'.
//
// Also note that this fuzzing would not catch '=',
// as we don't use the quoted string as a first argument
// to avoid running random commands.
//
// We could consider ways to fully sandbox the bash process,
// but for now that feels overkill.
out, err := exec.Command("bash", "-c", "printf %s "+quoted).CombinedOutput()
if err != nil {
t.Fatalf("bash error on %q quoted as %s: %v: %s", s, quoted, err, out)
}
want, got := s, string(out)
if want != got {
t.Fatalf("output mismatch on %q quoted as %s: got %q (len=%d)", want, quoted, got, len(got))
}
})
}
108 changes: 108 additions & 0 deletions syntax/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ package syntax

import (
"bytes"
"fmt"
"io"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)

Expand Down Expand Up @@ -1143,3 +1147,107 @@ func testBinaryOp(val string) BinTestOperator {
return 0
}
}

// Quote returns a quoted version of the input string,
// so that the quoted version is always expanded or interpreted
// as the original string.
//
// When the boolean result is false,
// the input string cannot be quoted to satisfy the rule above.
// For example, an expanded shell string can't contain a null byte.
//
// Quoting is necessary when using arbitrary literal strings
// as words in a shell script or command.
// Without quoting, one could run into syntax errors,
// as well as the possibility of running unintended code.
//
// The quoting strategy is chosen on a best-effort basis,
// to minimize the amount of extra bytes necessary.
//
// Some strings do not require any quoting and are returned unchanged.
// Those strings can be directly surrounded in single quotes.
func Quote(s string) (_ string, ok bool) {
shellChars := false
nonPrintable := false
for _, r := range s {
switch r {
// Like regOps; token characters.
case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`',
// Whitespace; might result in multiple fields.
' ', '\t', '\r', '\n',
// Escape sequences would be expanded.
'\\',
// Would start a comment unless quoted.
'#',
// Might result in brace expansion.
'{',
// Might result in tilde expansion.
'~',
// Might result in globbing.
'*', '?', '[',
// Might result in an assignment.
'=':
shellChars = true
}
if r == '\x00' {
// We can't quote null bytes.
return "", false
}
if r == utf8.RuneError || !unicode.IsPrint(r) {
nonPrintable = true
}
}
if !shellChars && !nonPrintable && !IsKeyword(s) {
// Nothing to quote; avoid allocating.
return s, true
}

// Single quotes are usually best,
// as they don't require any escaping of characters.
// If we have any invalid utf8 or non-printable runes,
// use $'' so that we can escape them.
// Note that we can't use double quotes for those.
var b strings.Builder
if nonPrintable {
b.WriteString("$'")
quoteBuf := make([]byte, 0, 16)
for rem := s; len(rem) > 0; {
r, size := utf8.DecodeRuneInString(rem)
switch {
case r == utf8.RuneError && size == 1:
fmt.Fprintf(&b, "\\x%x", rem[0])
case !unicode.IsPrint(r):
quoteBuf = quoteBuf[:0]
quoteBuf = strconv.AppendQuoteRuneToASCII(quoteBuf, r)
// We don't want the single quotes from strconv.
b.Write(quoteBuf[1 : len(quoteBuf)-1])
case r == '\'', r == '\\':
b.WriteByte('\\')
b.WriteRune(r)
default:
b.WriteRune(r)
}
rem = rem[size:]
}
b.WriteString("'")
return b.String(), true
}

// Single quotes without any need for escaping.
if !strings.Contains(s, "'") {
return "'" + s + "'", true
}

// The string contains single quotes,
// so fall back to double quotes.
b.WriteByte('"')
for _, r := range s {
switch r {
case '"', '\\', '`', '$':
b.WriteByte('\\')
}
b.WriteRune(r)
}
b.WriteByte('"')
return b.String(), true
}