From 1fd840623d1cbb8817b6b37f14d593ac40477e85 Mon Sep 17 00:00:00 2001
From: Roger Peppe <rogpeppe@gmail.com>
Date: Mon, 22 Jan 2024 15:10:47 +0000
Subject: [PATCH] cue/token: deprecate base offset

The base offset seems to be a hangover from the origins of the
package in [`go/token`](https://pkg.go.dev/go/token#FileSet.AddFile).
In that package, it signifies the overall offset inside a set of
line information shared amongst between many files.  In `cue/token`,
by contrast, it doesn't signify anything, witnessed by the fact
that when we _do_ try to use it, it [doesn't have the desired
effect](https://github.com/cue-lang/cue/issues/2776).

In effect, the base is added to the internal index stored in a
`token.Pos` and then removed before it's made visible to the API.

Because it's confusing, and because in general we can't
say that we're starting at an arbitrary point in a file
and hope line and column information will be able to work,
we deprecate the API, maintaining all user-visible behavior.

Signed-off-by: Roger Peppe <rogpeppe@gmail.com>
Change-Id: I57d0f54722bf4aa872ef1f36e2d2a8e2e43a5810
Dispatch-Trailer: {"type":"trybot","CL":1175779,"patchset":4,"ref":"refs/changes/79/1175779/4","targetBranch":"master"}
---
 cue/parser/error_test.go               |  2 +-
 cue/parser/interface.go                |  4 ++-
 cue/parser/parser.go                   |  4 +--
 cue/scanner/fuzz.go                    |  2 +-
 cue/scanner/scanner_test.go            | 24 ++++++++--------
 cue/token/position.go                  | 39 +++++++++++++++++---------
 encoding/json/json.go                  | 21 ++++++--------
 encoding/protobuf/parse.go             |  2 +-
 encoding/protobuf/textproto/decoder.go |  2 +-
 internal/attrs.go                      |  2 +-
 10 files changed, 55 insertions(+), 47 deletions(-)

diff --git a/cue/parser/error_test.go b/cue/parser/error_test.go
index 1f25b43ad..694bb7916 100644
--- a/cue/parser/error_test.go
+++ b/cue/parser/error_test.go
@@ -154,7 +154,7 @@ func checkErrors(t *testing.T, filename string, input interface{}) {
 	// we are expecting the following errors
 	// (collect these after parsing a file so that it is found in the file set)
 	if file == nil {
-		t.Fatal("")
+		t.Fatal("no token.File for ast.File")
 	}
 	expected := expectedErrors(t, file, src)
 
diff --git a/cue/parser/interface.go b/cue/parser/interface.go
index af498773c..6afca039a 100644
--- a/cue/parser/interface.go
+++ b/cue/parser/interface.go
@@ -118,8 +118,10 @@ const (
 )
 
 // FileOffset specifies the File position info to use.
+//
+// Deprecated: this has no effect.
 func FileOffset(pos int) Option {
-	return func(p *parser) { p.offset = pos }
+	return func(p *parser) {}
 }
 
 // A mode value is a set of flags (or 0).
diff --git a/cue/parser/parser.go b/cue/parser/parser.go
index 5a3e336b5..c10e28e48 100644
--- a/cue/parser/parser.go
+++ b/cue/parser/parser.go
@@ -33,7 +33,6 @@ var debugStr = astinternal.DebugStr
 // The parser structure holds the parser's internal state.
 type parser struct {
 	file    *token.File
-	offset  int
 	errors  errors.Error
 	scanner scanner.Scanner
 
@@ -68,11 +67,10 @@ type parser struct {
 }
 
 func (p *parser) init(filename string, src []byte, mode []Option) {
-	p.offset = -1
 	for _, f := range mode {
 		f(p)
 	}
-	p.file = token.NewFile(filename, p.offset, len(src))
+	p.file = token.NewFile(filename, -1, len(src))
 
 	var m scanner.Mode
 	if p.mode&parseCommentsMode != 0 {
diff --git a/cue/scanner/fuzz.go b/cue/scanner/fuzz.go
index a8b560e4b..3ad29c54e 100644
--- a/cue/scanner/fuzz.go
+++ b/cue/scanner/fuzz.go
@@ -27,7 +27,7 @@ func Fuzz(b []byte) int {
 	}
 
 	var s Scanner
-	s.Init(token.NewFile("", 1, len(b)), b, eh, ScanComments)
+	s.Init(token.NewFile("", -1, len(b)), b, eh, ScanComments)
 
 	for {
 		_, tok, _ := s.Scan()
diff --git a/cue/scanner/scanner_test.go b/cue/scanner/scanner_test.go
index 44bc3672e..d6456f09f 100644
--- a/cue/scanner/scanner_test.go
+++ b/cue/scanner/scanner_test.go
@@ -242,7 +242,7 @@ func TestScan(t *testing.T) {
 
 	// verify scan
 	var s Scanner
-	s.Init(token.NewFile("", 1, len(source)), source, eh, ScanComments|DontInsertCommas)
+	s.Init(token.NewFile("", -1, len(source)), source, eh, ScanComments|DontInsertCommas)
 
 	// set up expected position
 	epos := token.Position{
@@ -327,7 +327,7 @@ func TestScan(t *testing.T) {
 
 func checkComma(t *testing.T, line string, mode Mode) {
 	var S Scanner
-	file := token.NewFile("TestCommas", 1, len(line))
+	file := token.NewFile("TestCommas", -1, len(line))
 	S.Init(file, []byte(line), nil, mode)
 	pos, tok, lit := S.Scan()
 	for tok != token.EOF {
@@ -496,7 +496,7 @@ func TestRelative(t *testing.T) {
 		"elided  ,        \n",
 	}
 	var S Scanner
-	f := token.NewFile("TestCommas", 1, len(test))
+	f := token.NewFile("TestCommas", -1, len(test))
 	S.Init(f, []byte(test), nil, ScanComments)
 	pos, tok, lit := S.Scan()
 	got := []string{}
@@ -558,7 +558,7 @@ func TestLineComments(t *testing.T) {
 
 	// verify scan
 	var S Scanner
-	f := token.NewFile(filepath.Join("dir", "TestLineComments"), 1, len(src))
+	f := token.NewFile(filepath.Join("dir", "TestLineComments"), -1, len(src))
 	S.Init(f, []byte(src), nil, DontInsertCommas)
 	for _, s := range segs {
 		p, _, lit := S.Scan()
@@ -582,7 +582,7 @@ func TestInit(t *testing.T) {
 
 	// 1st init
 	src1 := "false true { }"
-	f1 := token.NewFile("src1", 1, len(src1))
+	f1 := token.NewFile("src1", -1, len(src1))
 	s.Init(f1, []byte(src1), nil, DontInsertCommas)
 	if f1.Size() != len(src1) {
 		t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1))
@@ -596,7 +596,7 @@ func TestInit(t *testing.T) {
 
 	// 2nd init
 	src2 := "null true { ]"
-	f2 := token.NewFile("src2", 1, len(src2))
+	f2 := token.NewFile("src2", -1, len(src2))
 	s.Init(f2, []byte(src2), nil, DontInsertCommas)
 	if f2.Size() != len(src2) {
 		t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2))
@@ -629,7 +629,7 @@ func TestScanInterpolation(t *testing.T) {
 	for i, src := range sources {
 		name := fmt.Sprintf("tsrc%d", i)
 		t.Run(name, func(t *testing.T) {
-			f := token.NewFile(name, 1, len(src))
+			f := token.NewFile(name, -1, len(src))
 
 			// verify scan
 			var s Scanner
@@ -674,7 +674,7 @@ func TestStdErrorHander(t *testing.T) {
 	}
 
 	var s Scanner
-	s.Init(token.NewFile("File1", 1, len(src)), []byte(src), eh, DontInsertCommas)
+	s.Init(token.NewFile("File1", -1, len(src)), []byte(src), eh, DontInsertCommas)
 	for {
 		if _, tok, _ := s.Scan(); tok == token.EOF {
 			break
@@ -713,7 +713,7 @@ func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err str
 		h.msg = fmt.Sprintf(msg, args...)
 		h.pos = pos
 	}
-	s.Init(token.NewFile("", 1, len(src)), []byte(src), eh, ScanComments|DontInsertCommas)
+	s.Init(token.NewFile("", -1, len(src)), []byte(src), eh, ScanComments|DontInsertCommas)
 	_, tok0, lit0 := s.Scan()
 	if tok0 != tok {
 		t.Errorf("%q: got %s, expected %s", src, tok0, tok)
@@ -861,7 +861,7 @@ func TestNoLiteralComments(t *testing.T) {
 		}
 	`
 	var s Scanner
-	s.Init(token.NewFile("", 1, len(src)), []byte(src), nil, 0)
+	s.Init(token.NewFile("", -1, len(src)), []byte(src), nil, 0)
 	for {
 		pos, tok, lit := s.Scan()
 		class := tokenclass(tok)
@@ -876,7 +876,7 @@ func TestNoLiteralComments(t *testing.T) {
 
 func BenchmarkScan(b *testing.B) {
 	b.StopTimer()
-	file := token.NewFile("", 1, len(source))
+	file := token.NewFile("", -1, len(source))
 	var s Scanner
 	b.StartTimer()
 	for i := 0; i < b.N; i++ {
@@ -897,7 +897,7 @@ func BenchmarkScanFile(b *testing.B) {
 	if err != nil {
 		panic(err)
 	}
-	file := token.NewFile(filename, 1, len(src))
+	file := token.NewFile(filename, -1, len(src))
 	b.SetBytes(int64(len(src)))
 	var s Scanner
 	b.StartTimer()
diff --git a/cue/token/position.go b/cue/token/position.go
index 577254f82..675f70b11 100644
--- a/cue/token/position.go
+++ b/cue/token/position.go
@@ -199,26 +199,35 @@ func toPos(x index) int {
 // -----------------------------------------------------------------------------
 // File
 
+// index represents an offset into the file.
+// It's 1-based rather than zero-based so that
+// we can distinguish the zero Pos from a Pos that
+// just has a zero offset.
 type index int
 
 // A File has a name, size, and line offset table.
 type File struct {
 	mutex sync.RWMutex
 	name  string // file name as provided to AddFile
-	base  index  // Pos index range for this file is [base...base+size]
-	size  index  // file size as provided to AddFile
+	// base is deprecated and stored only so that [File.Base]
+	// can continue to return the same value passed to [NewFile].
+	base index
+	size index // file size as provided to AddFile
 
 	// lines and infos are protected by set.mutex
 	lines []index // lines contains the offset of the first character for each line (the first entry is always 0)
 	infos []lineInfo
 }
 
-// NewFile returns a new file.
-func NewFile(filename string, base, size int) *File {
-	if base < 0 {
-		base = 1
+// NewFile returns a new file with the given OS file name. The size provides the
+// size of the whole file.
+//
+// The second argument is deprecated. It has no effect.
+func NewFile(filename string, deprecatedBase, size int) *File {
+	if deprecatedBase < 0 {
+		deprecatedBase = 1
 	}
-	return &File{sync.RWMutex{}, filename, index(base), index(size), []index{0}, nil}
+	return &File{sync.RWMutex{}, filename, index(deprecatedBase), index(size), []index{0}, nil}
 }
 
 // Name returns the file name of file f as registered with AddFile.
@@ -226,12 +235,14 @@ func (f *File) Name() string {
 	return f.name
 }
 
-// Base returns the base offset of file f as registered with AddFile.
+// Base returns the base offset of file f as passed to NewFile.
+//
+// Deprecated: this method just returns the (deprecated) second argument passed to NewFile.
 func (f *File) Base() int {
 	return int(f.base)
 }
 
-// Size returns the size of file f as registered with AddFile.
+// Size returns the size of file f as passed to NewFile.
 func (f *File) Size() int {
 	return int(f.size)
 }
@@ -359,7 +370,7 @@ func (f *File) Pos(offset int, rel RelPos) Pos {
 	if index(offset) > f.size {
 		panic("illegal file offset")
 	}
-	return Pos{f, toPos(f.base+index(offset)) + int(rel)}
+	return Pos{f, toPos(1+index(offset)) + int(rel)}
 }
 
 // Offset returns the offset for the given file position p;
@@ -367,10 +378,10 @@ func (f *File) Pos(offset int, rel RelPos) Pos {
 // f.Offset(f.Pos(offset)) == offset.
 func (f *File) Offset(p Pos) int {
 	x := p.index()
-	if x < f.base || x > f.base+index(f.size) {
+	if x < 1 || x > 1+index(f.size) {
 		panic("illegal Pos value")
 	}
-	return int(x - f.base)
+	return int(x - 1)
 }
 
 // Line returns the line number for the given file position p;
@@ -405,7 +416,7 @@ func (f *File) unpack(offset index, adjusted bool) (filename string, line, colum
 }
 
 func (f *File) position(p Pos, adjusted bool) (pos Position) {
-	offset := p.index() - f.base
+	offset := p.index() - 1
 	pos.Offset = int(offset)
 	pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted)
 	return
@@ -418,7 +429,7 @@ func (f *File) position(p Pos, adjusted bool) (pos Position) {
 func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) {
 	x := p.index()
 	if p != NoPos {
-		if x < f.base || x > f.base+f.size {
+		if x < 1 || x > 1+f.size {
 			panic("illegal Pos value")
 		}
 		pos = f.position(p, adjusted)
diff --git a/encoding/json/json.go b/encoding/json/json.go
index 69c898651..51c1e71a7 100644
--- a/encoding/json/json.go
+++ b/encoding/json/json.go
@@ -99,19 +99,17 @@ func extract(path string, b []byte) (ast.Expr, error) {
 // The runtime may be nil if Decode isn't used.
 func NewDecoder(r *cue.Runtime, path string, src io.Reader) *Decoder {
 	return &Decoder{
-		r:      r,
-		path:   path,
-		dec:    json.NewDecoder(src),
-		offset: 1,
+		r:    r,
+		path: path,
+		dec:  json.NewDecoder(src),
 	}
 }
 
 // A Decoder converts JSON values to CUE.
 type Decoder struct {
-	r      *cue.Runtime
-	path   string
-	dec    *json.Decoder
-	offset int
+	r    *cue.Runtime
+	path string
+	dec  *json.Decoder
 }
 
 // Extract converts the current JSON value to a CUE ast. It returns io.EOF
@@ -131,13 +129,12 @@ func (d *Decoder) extract() (ast.Expr, error) {
 	if err == io.EOF {
 		return nil, err
 	}
-	offset := d.offset
-	d.offset += len(raw)
 	if err != nil {
-		pos := token.NewFile(d.path, offset, len(raw)).Pos(0, 0)
+		pos := token.NewFile(d.path, -1, len(raw)).Pos(0, 0)
 		return nil, errors.Wrapf(err, pos, "invalid JSON for file %q", d.path)
 	}
-	expr, err := parser.ParseExpr(d.path, []byte(raw), parser.FileOffset(offset))
+	expr, err := parser.ParseExpr(d.path, []byte(raw))
+
 	if err != nil {
 		return nil, err
 	}
diff --git a/encoding/protobuf/parse.go b/encoding/protobuf/parse.go
index 8881a031d..93e15cb70 100644
--- a/encoding/protobuf/parse.go
+++ b/encoding/protobuf/parse.go
@@ -61,7 +61,7 @@ func (s *Extractor) parse(filename string, src interface{}) (p *protoConverter,
 		return nil, errors.Newf(token.NoPos, "protobuf: %v", err)
 	}
 
-	tfile := token.NewFile(filename, 0, len(b))
+	tfile := token.NewFile(filename, -1, len(b))
 	tfile.SetLinesForContent(b)
 
 	p = &protoConverter{
diff --git a/encoding/protobuf/textproto/decoder.go b/encoding/protobuf/textproto/decoder.go
index d5f95472d..e917342d3 100644
--- a/encoding/protobuf/textproto/decoder.go
+++ b/encoding/protobuf/textproto/decoder.go
@@ -77,7 +77,7 @@ func (d *Decoder) Parse(schema cue.Value, filename string, b []byte) (ast.Expr,
 
 	// dec.errs = nil
 
-	f := token.NewFile(filename, 0, len(b))
+	f := token.NewFile(filename, -1, len(b))
 	f.SetLinesForContent(b)
 	dec.file = f
 
diff --git a/internal/attrs.go b/internal/attrs.go
index 44707bb1e..6e50e3f1d 100644
--- a/internal/attrs.go
+++ b/internal/attrs.go
@@ -148,7 +148,7 @@ func ParseAttrBody(pos token.Pos, s string) (a Attr) {
 	// Create temporary token.File so that scanner has something
 	// to work with.
 	// TODO it's probably possible to do this without allocations.
-	tmpFile := token.NewFile("", 0, len(s))
+	tmpFile := token.NewFile("", -1, len(s))
 	if len(s) > 0 {
 		tmpFile.AddLine(len(s) - 1)
 	}