From 81ac50a267b202000b76166099f4c917626389f2 Mon Sep 17 00:00:00 2001 From: getzze Date: Fri, 2 Oct 2020 23:42:11 +0100 Subject: [PATCH 1/2] Add julia parser --- .../corner_cases.d/expected.tags | 30 + Units/parser-julia.r/corner_cases.d/input.jl | 409 +++++ .../infinite_loop.d/expected.tags | 1 + Units/parser-julia.r/infinite_loop.d/input.jl | 3 + Units/parser-julia.r/julia_test.d/args.ctags | 1 + .../parser-julia.r/julia_test.d/expected.tags | 22 + Units/parser-julia.r/julia_test.d/input.jl | 48 + docs/news.rst | 1 + main/parsers_p.h | 1 + parsers/julia.c | 1364 +++++++++++++++++ source.mak | 1 + win32/ctags_vs2013.vcxproj | 1 + win32/ctags_vs2013.vcxproj.filters | 3 + 13 files changed, 1885 insertions(+) create mode 100644 Units/parser-julia.r/corner_cases.d/expected.tags create mode 100644 Units/parser-julia.r/corner_cases.d/input.jl create mode 100644 Units/parser-julia.r/infinite_loop.d/expected.tags create mode 100644 Units/parser-julia.r/infinite_loop.d/input.jl create mode 100644 Units/parser-julia.r/julia_test.d/args.ctags create mode 100644 Units/parser-julia.r/julia_test.d/expected.tags create mode 100644 Units/parser-julia.r/julia_test.d/input.jl create mode 100644 parsers/julia.c diff --git a/Units/parser-julia.r/corner_cases.d/expected.tags b/Units/parser-julia.r/corner_cases.d/expected.tags new file mode 100644 index 0000000000..3d6aa13a18 --- /dev/null +++ b/Units/parser-julia.r/corner_cases.d/expected.tags @@ -0,0 +1,30 @@ +!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ +!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ +!_TAG_OUTPUT_MODE u-ctags /u-ctags or e-ctags/ +Baz.foo input.jl /^Baz.foo(x) = 1$/;" f +Foo input.jl /^abstract type Foo <: Bar end$/;" t +Foo input.jl /^mutable struct Foo$/;" s +Foo input.jl /^struct Foo$/;" s +Foo.bar input.jl /^function Foo.bar(x, y)$/;" f +Mod1 input.jl /^baremodule Mod1$/;" n +Mod2 input.jl /^module Mod2$/;" n +Point input.jl /^struct Point{T} <: Pointy{T}$/;" s +Pointy input.jl /^abstract type Pointy{T} end$/;" t +cell input.jl /^cell(dims::(Integer...)) = Array(Any, convert((Int...), dims))$/;" f +elsize input.jl /^elsize(::AbstractArray{T}) where {T} = sizeof(T)$/;" f +elsize input.jl /^function elsize(::AbstractArray{T}) where T$/;" f +f input.jl /^f(x::FooBar) = x$/;" f +foo input.jl /^foo(x::(Int,)) = 1$/;" f +foo input.jl /^function foo()$/;" f +foo_bar! input.jl /^foo_bar!(x,y) = x + y$/;" f +foo_bar! input.jl /^function foo_bar!(x,y)$/;" f +g input.jl /^function g(x, y)::Int8$/;" f +myfun input.jl /^@inline myfun() = println("myfun")$/;" f +norm input.jl /^function norm(p::Point{<:Real})$/;" f +same_type_numeric input.jl /^same_type_numeric(x::T, y::T) where T = false$/;" f +same_type_numeric input.jl /^same_type_numeric(x::T, y::T) where {T <: Number} = true$/;" f +test input.jl /^function test(x)$/;" f +x input.jl /^ x::Bar$/;" g struct:Foo +x input.jl /^ x::T$/;" g struct:Point +y input.jl /^ y::T$/;" g struct:Point +y input.jl /^const y = "hello world"$/;" c diff --git a/Units/parser-julia.r/corner_cases.d/input.jl b/Units/parser-julia.r/corner_cases.d/input.jl new file mode 100644 index 0000000000..99957b81ca --- /dev/null +++ b/Units/parser-julia.r/corner_cases.d/input.jl @@ -0,0 +1,409 @@ +#= Julia syntax highlighting test. + +Modified from https://github.com/JuliaEditorSupport/julia-syntax-test-cases + +This file derives from https://gist.github.com/Wilfred/f1aca44c61ed6e1df603 +whose author is [@Wilfred](https://github.com/Wilfred). @Wilfred has put it in +the public domain: + https://gist.github.com/Wilfred/f1aca44c61ed6e1df603#gistcomment-2948526 + +Changes from the original are governed by the license of the repository in +which this file is found. + +This file is designed to test various corner cases of Julia +syntax highlighting. +=# + +baremodule Mod1 + # Nothing here +end + +module Mod2 + # Here neither +end + +## Simple function definitions. +# Expected: `function` should be highlighted, as should `foo_bar!`. +function foo_bar!(x,y) + x + y + 1 +end + +# Expected: `foo_bar!` should be highlighted. +foo_bar!(x,y) = x + y + +# Expected: `foo` should be highlighted. +Baz.foo(x) = 1 + +# Expected: `foo` should be highlighted. +foo(x::(Int,)) = 1 + +# Expected: `foo` should be highlighted. +foo(x, y=length(x)) + +## Function definitions in namespaces. +# Expected: `bar` should be highlighted. +function Foo.bar(x, y) + x + 1 +end + +## Function definitions with type variables. +# Expected: `elsize` should be highlighted. +elsize(::AbstractArray{T}) where {T} = sizeof(T) + +function elsize(::AbstractArray{T}) where T + sizeof(T) +end + +## Nested brackets in function definitions. +# Expected: `cell` should be highlighted. +cell(dims::(Integer...)) = Array(Any, convert((Int...), dims)) + +# TODO: find an example with a nested type expression. + +## Macro usage +# Expected: `@hello_world!` should be highlighted. +@hello_world! foo + +# Expected: highlight `myfun` +@inline myfun() = println("myfun") + +## Builtin functions. +# Expected: `throw`, `error` and `super` should not be highlighted. There are +# too many built-in functions for this to be useful. +# https://github.com/JuliaLang/julia/commit/134867c69096fcb52afa5d5a7433892b5127e981 +# https://github.com/JuliaLang/julia/pull/7963#issuecomment-52586261 +throw(foo) +error("foo", bar, "baz") +super(Int) + +## Strings +# Expected: highlight the string. +x = "foo \"bar\" baz" + +# Expected: highlight the whole string. +x = "foo +bar" + +# Expected: highlight the whole triple-quoted string. +x = """hello "world" foobar""" +y = """foo\\""" +z = """bar\"""" +w = """"bar""" + +# Expected: highlight `$user` +x = "hello $user" + +# Expected: don't highlight `$user` +x = "hello \$user" + +# Expected: highlight `$val` +x = """(a="$val")""" + +# Expected: treat r as part of the string, so `r"a"` is highlighted. +x = r"0.1" + +# Expected: treat ismx as part of the string, so `r"a"ismx` is highlighted. +x = r"a"ismx + +# Expected: highlight `r"""a "b" c"""` +x = r"""a "b" c""" + +# Expected: treat v as part of the string, so `v"0.1"` is highlighted. +x = v"0.1" + +# Expected: treat b as part of the string, so `b"a"` is highlighted. +x = b"a" + +# Bonus points: +# Expected: highlight the interpolation brackets `$(` and `)` +x = "hello $(user * user)" + +# Bonus points: +# Expected: highlight regexp metacharacters `[` and `]` +x = r"[abc]" + +# Bonus points: +# Expected: highlight escape sequences `\xff` and `\u2200` +x = b"DATA\xff\u2200" + +# Bonus points: +# Expected: don't highlight `$user` +x = raw"hello $user" + +## Characters +# Expected: highlight the character. +x = 'a' +y = '\u0' +z = '\U10ffff' +w = '\x41' +a = ' ' +b = '"' +c = ''' +d = '\'' +e = '\\' + +# Expected: don't highlight, as ' is an operator here, not a character delimiter. +a = b' + c' +A''' + +# Bonus points: +# Expected: don't highlight the character +x = 'way too long so not a character' +x = '' + +## Comments +# Expected: highlight `# foo` +# foo + +# Expected: highlight `#= foo\n bar =#` +#= foo +bar =# + +# Expected: highlight `#= #= =# =#` (comments can nest). +#= #= =# =# + +# Expected: highlight `'` as adjoint operator +A#==#' +(A)#==#' +A[1]#==#' + +## Type declarations + +# Expected highlight `Foo` and `Bar` +mutable struct Foo + x::Bar +end + +# Expected highlight `Foo` and `Bar` +struct Foo + x::Bar +end + +# Expected: highlight `Foo` and `Bar` +abstract type Foo <: Bar end + +# Expected: don't highlight x or y +x <: y + +## Type annotations + +# Expected: highlight `FooBar` +f(x::FooBar) = x + +# Expected: highlight `Int8` +function foo() + local x::Int8 = 5 + x +end + +# Expected: highlight `Point` and `Real` as types +function norm(p::Point{<:Real}) + sqrt(p.x^2 + p.y^2) +end + +# Expected: highlight `g` as function and `Int8` as type +function g(x, y)::Int8 + return x * y +end + +# Expected: highlight `T` and `Number` +same_type_numeric(x::T, y::T) where {T <: Number} = true +same_type_numeric(x::T, y::T) where T = false + +## Parametric type declaration + +# Expected: highlight `Pointy` and `T` +abstract type Pointy{T} end + +# Expected: highlight `Point`, `Pointy` and `T` +struct Point{T} <: Pointy{T} + x::T + y::T +end + +## Variable declarations + +# Expected: highlight `x` and `y` +global x = "foo, bar = 2", y = 3 + +# Expected: highlight `x` and `y` +global x = foo(a, b), y = 3 + +# Expected: highlight `y` +const y = "hello world" + +# Expected: highlight `x` and `y` +function foo() + local x = f(1, 2), y = f(3, 4) + x + y +end + +# Expected: highlight `x` and `y` +let x = f(1, 2), y = f(3, 4) + x + y +end + +## Colons and end + +# Expected: highlight `:foo`, `:end` and `:function` +:foo +x = :foo +y = :function +z = :end + +# Expected: highlight index `[end]` differently to block delimiter `end` +if foo[end] +end + +# Expected: highlight as index `end` +foo[bar:end] + +# Expected: highlight as index `begin` +foo[begin:4] + +# Expected: don't highlight `:123` +x = :123 + +# Expected: don't highlight `:baz` +foo[bar:baz] + +# Expected: highlight `:baz` +foo[:baz] + +# Expected: highlight both `:baz` +foo(:baz,:baz) + +# Note that `: foo` is currently a valid quoted symbol, this will hopefully +# change in 0.4: https://github.com/JuliaLang/julia/issues/5997 + +# Expected: highlight `:foo` +[1 :foo] + +# Expected: highlight `:end` +[1 :end] + +# Expected: highlight `:two` +@eval :one+:two + +# Expected: don't highlight `:end` but `end` as index +[(1+1):end] + +# Expected: don't highlight `:end` but `end` as index +[a[1]:end] + +# Expected: don't highlight `:foo` +for x=1:foo + print(x) +end + +## Range detection + +# Bonus points: +# Expected: don't highlight `:s2` +push!(a, s1 :s2) + +# Bonus points: +# Expected: don't highlight `:end` +a[begin :end] + +## Expression evaluation + +# Expected: highlight `:` as operator +a = :(x = 2) + +# Expected: highlight `:call` and `:b` as symbols +# Debatable: highlight `:+` as operator +ex = Expr(:call, :+, a, :b) + +## Number highlighting + +# Expected: highlight all these as numbers +x = 123 +x = 1.1 +x = .5 +x = 0x123abcdef +x = 0o7 +x = 0b1011 +x = 2.5e-4 +x = 2.5E-4 +x = 1e+00 +x = 2.5f-4 +x = 0x.4p-1 +x = 1_000 + +# Expected: highlight these as numbers or built-ins +x = Inf +x = NaN + +# Expected: highlight `123`, not the letter +y = 123x +y = 123e + +# Expected: highlight `1e+1` and `1e-1` +1e+1+1e-1 + +# Expected: highlight `1.` and `.1` +1. +.1 +# Note that `1.+1` is currently ambiguous and gives an error + +# Bonus points: +# Expected: don't highlight `..` +x = 1..3 + +# Bonus points: +# Debatable: highlight the first two digits, not the following digits +# or show an error +x = 0o1291 +x = 0b1091 + +# Debatable: highlight `π` as a number or built-in +# (note that `πx` is a single symbol, not `π * x`) +x = π + +## List comprehension +# Expected: highlight `for` and `if` without the `end` keyword +[i for i in 1:5 if i%2==0] + +## Broadcasting +# Expected: highlight `.+` as operator +a.+1 + +## Command +# Expected: highlight "`echo 1`" +c = `echo 1` + +# Expected: highlight "```echo `hello 1` ```" +c = ```echo `hello 1` ``` + +# Expected: highlight "raw`echo $1`" +c = raw`echo $1` + +## Non-standard identifiers +# Bonus points: +# Expected: highlight `var"##"` as a function +function var"##"(x) + println(x) +end + +# Bonus points: +# Expected: highlight `var"%%"` as a function +var"%%"(x) = println(x) + +# Bonus points: +# Expected: highlight `$var` as string and `##""` as comment +"$var"##"" + +# Bonus points: +# Expected: highlight `$(var")(")` as string interpolation +"$(var")(")" + +# Bonus points: +# Expected: highlight `'` as adjoint operator +var"##mat"' + +## Code folding: for and if in list comprehension +# Expected: fold between function and last end +function test(x) + a = (if x; 0 else 1 end) + println(a) +end diff --git a/Units/parser-julia.r/infinite_loop.d/expected.tags b/Units/parser-julia.r/infinite_loop.d/expected.tags new file mode 100644 index 0000000000..45c637dbff --- /dev/null +++ b/Units/parser-julia.r/infinite_loop.d/expected.tags @@ -0,0 +1 @@ +X input.jl /^struct X$/;" s diff --git a/Units/parser-julia.r/infinite_loop.d/input.jl b/Units/parser-julia.r/infinite_loop.d/input.jl new file mode 100644 index 0000000000..c2193d5367 --- /dev/null +++ b/Units/parser-julia.r/infinite_loop.d/input.jl @@ -0,0 +1,3 @@ +struct X + X +end diff --git a/Units/parser-julia.r/julia_test.d/args.ctags b/Units/parser-julia.r/julia_test.d/args.ctags new file mode 100644 index 0000000000..5ee5f79f70 --- /dev/null +++ b/Units/parser-julia.r/julia_test.d/args.ctags @@ -0,0 +1 @@ +--sort=no diff --git a/Units/parser-julia.r/julia_test.d/expected.tags b/Units/parser-julia.r/julia_test.d/expected.tags new file mode 100644 index 0000000000..c144b4dc47 --- /dev/null +++ b/Units/parser-julia.r/julia_test.d/expected.tags @@ -0,0 +1,22 @@ +Revise input.jl /^using Revise$/;" x +Normal input.jl /^import Distributions: Normal$/;" x +Random.randn input.jl /^using Random.randn$/;" x +Plots input.jl /^using Plots, Makie$/;" x +Makie input.jl /^using Plots, Makie$/;" x +a input.jl /^const a::Int = 'c' # struct Struct_wrong3 end$/;" c +test_macro input.jl /^macro test_macro() end$/;" m +test_fun input.jl /^function test_fun(a::Int, b::T) where #$/;" f +Base.ifelse input.jl /^function Base.ifelse(a::Int)$/;" f +lone_function input.jl /^function lone_function end$/;" f +run_test input.jl /^function run_test(a::T) where T<:Int; a::Int end$/;" f +eq input.jl /^eq(c=4; b=(1,2,3), c=:a=>5) = a == b$/;" f +eq input.jl /^eq(a::T, b::T, c=4; b=(1,2,3), c=:a=>5)::T where T<:Real = (a == b; a)$/;" f +ATest input.jl /^abstract type ATest end$/;" t +STest input.jl /^mutable struct STest <: ATest; a::Int end$/;" s +a input.jl /^mutable struct STest <: ATest; a::Int end$/;" g struct:STest +Test1 input.jl /^struct Test1 <: ATest$/;" s +a input.jl /^ a::Int$/;" g struct:Test1 +α input.jl /^ α::Real$/;" g struct:Test1 +Test1 input.jl /^ Test1() = new(0)$/;" f struct:Test1 +Test1 input.jl /^ Test1(a) = new(1)$/;" f struct:Test1 +Test1 input.jl /^ Test1(a, b) = begin new(2) end$/;" f struct:Test1 diff --git a/Units/parser-julia.r/julia_test.d/input.jl b/Units/parser-julia.r/julia_test.d/input.jl new file mode 100644 index 0000000000..9bd714caa9 --- /dev/null +++ b/Units/parser-julia.r/julia_test.d/input.jl @@ -0,0 +1,48 @@ +using Revise +import Distributions: Normal +using Random.randn + +using Plots, Makie + + +const a::Int = 'c' # struct Struct_wrong3 end + +macro test_macro() end + +""" + test_fun(a::Int) +For test only +""" +function test_fun(a::Int, b::T) where # + T <:Array{S} where S <: Number + println(a) + println("foo") +end + + +function Base.ifelse(a::Int) + println("bar") +end +function lone_function end + +function run_test(a::T) where T<:Int; a::Int end + +eq(c=4; b=(1,2,3), c=:a=>5) = a == b +eq(a::T, b::T, c=4; b=(1,2,3), c=:a=>5)::T where T<:Real = (a == b; a) + +#= + Structs +=# +abstract type ATest end + +mutable struct STest <: ATest; a::Int end + + +struct Test1 <: ATest + a::Int + α::Real + + Test1() = new(0) + Test1(a) = new(1) + Test1(a, b) = begin new(2) end +end diff --git a/docs/news.rst b/docs/news.rst index f38898ada6..c4442275d9 100644 --- a/docs/news.rst +++ b/docs/news.rst @@ -62,6 +62,7 @@ The following parsers have been added: * Inko *optlib* * JavaProperties * JSON +* Julia * Kconfig *optlib* * GNU linker script(LdScript) * Man page *optlib* diff --git a/main/parsers_p.h b/main/parsers_p.h index 7392fdbe3c..6cdcbae399 100644 --- a/main/parsers_p.h +++ b/main/parsers_p.h @@ -89,6 +89,7 @@ JavaPropertiesParser, \ JavaScriptParser, \ JsonParser, \ + JuliaParser, \ KconfigParser, \ LdScriptParser, \ LispParser, \ diff --git a/parsers/julia.c b/parsers/julia.c new file mode 100644 index 0000000000..9d7b3516d0 --- /dev/null +++ b/parsers/julia.c @@ -0,0 +1,1364 @@ +/* +* Copyright (c) 2020-2021, getzze +* +* This source code is released for free distribution under the terms of the +* GNU General Public License version 2 or (at your option) any later version. +* +* This module contains functions for generating tags for Julia files. +* +* Documented 'kinds': +* https://docs.julialang.org/en/v1/manual/documentation/#Syntax-Guide +* Language parser in Scheme: +* https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm +*/ + +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ + +#include + +#include "keyword.h" +#include "parse.h" +#include "entry.h" +#include "options.h" +#include "read.h" +#include "routines.h" +#include "vstring.h" +#include "xtag.h" + +/* +* MACROS +*/ +#define MAX_STRING_LENGTH 256 + +/* +* DATA DEFINITIONS +*/ +typedef enum { + K_CONSTANT, + K_FUNCTION, + K_FIELD, + K_MACRO, + K_MODULE, + K_STRUCT, + K_TYPE, + K_IMPORT, + K_NONE +} JuliaKind; + +static kindDefinition JuliaKinds [] = { + { true, 'c', "constant", "Constants" }, + { true, 'f', "function", "Functions" }, + { true, 'g', "field", "Fields" }, + { true, 'm', "macro", "Macros" }, + { true, 'n', "module", "Modules" }, + { true, 's', "struct", "Structures" }, + { true, 't', "type", "Types" }, + { true, 'x', "unknown", "Imported name"} +}; + +typedef enum { + TOKEN_NONE=0, /* none */ + TOKEN_WHITESPACE, + TOKEN_PAREN_BLOCK, + TOKEN_BRACKET_BLOCK, + TOKEN_CURLY_BLOCK, + TOKEN_OPEN_BLOCK, + TOKEN_CLOSE_BLOCK, + TOKEN_TYPE_ANNOTATION, + TOKEN_TYPE_WHERE, + TOKEN_CONST, + TOKEN_STRING, /* = 10 */ + TOKEN_COMMAND, + TOKEN_MACROCALL, + TOKEN_IDENTIFIER, + TOKEN_MODULE, + TOKEN_MACRO, + TOKEN_FUNCTION, + TOKEN_STRUCT, + TOKEN_ENUM, + TOKEN_TYPE, + TOKEN_IMPORT, /* = 20 */ + TOKEN_EXPORT, + TOKEN_NEWLINE, + TOKEN_SEMICOLON, + TOKEN_COMPOSER_KWD, /* KEYWORD only */ + TOKEN_EOF, + TOKEN_COUNT +} tokenType; + +static const keywordTable JuliaKeywordTable [] = { + /* TODO: Sort by keys. */ + { "mutable", TOKEN_COMPOSER_KWD }, + { "primitive", TOKEN_COMPOSER_KWD }, + { "abstract", TOKEN_COMPOSER_KWD }, + + { "if", TOKEN_OPEN_BLOCK }, + { "for", TOKEN_OPEN_BLOCK }, + { "while", TOKEN_OPEN_BLOCK }, + { "try", TOKEN_OPEN_BLOCK }, + { "do", TOKEN_OPEN_BLOCK }, + { "begin", TOKEN_OPEN_BLOCK }, + { "let", TOKEN_OPEN_BLOCK }, + { "quote", TOKEN_OPEN_BLOCK }, + + { "module", TOKEN_MODULE }, + { "baremodule",TOKEN_MODULE }, + + { "using", TOKEN_IMPORT }, + { "import", TOKEN_IMPORT }, + + { "export", TOKEN_EXPORT }, + { "const", TOKEN_CONST }, + { "macro", TOKEN_MACRO }, + { "function", TOKEN_FUNCTION }, + { "struct", TOKEN_STRUCT }, + { "type", TOKEN_TYPE }, + { "where", TOKEN_TYPE_WHERE }, + { "end", TOKEN_CLOSE_BLOCK }, +}; + +typedef struct { + /* Characters */ + int prev_c; + int cur_c; + int next_c; + + /* Tokens */ + bool first_token; + int cur_token; + vString* token_str; + unsigned long line; + MIOPos pos; +} lexerState; + +/* +* FUNCTION PROTOTYPES +*/ + +static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope); + +static void scanParenBlock (lexerState *lexer); + +/* +* FUNCTION DEFINITIONS +*/ + +static int endswith(const char* what, const char* withwhat) +{ + int l1 = strlen(what); + int l2 = strlen(withwhat); + if (l2 > l1) + { + return 0; + } + + return strcmp(withwhat, what + (l1 - l2)) == 0; +} + +/* Resets the scope string to the old length */ +static void resetScope (vString *scope, size_t old_len) +{ + vStringTruncate (scope, old_len); +} + +/* Adds a name to the end of the scope string */ +static void addToScope (vString *scope, vString *name) +{ + if (vStringLength(scope) > 0) + { + vStringPut(scope, '.'); + } + vStringCat(scope, name); +} + +/* Reads a character from the file */ +static void advanceChar (lexerState *lexer) +{ + lexer->prev_c = lexer->cur_c; + lexer->cur_c = lexer->next_c; + lexer->next_c = getcFromInputFile(); +} + +/* Reads N characters from the file */ +static void advanceNChar (lexerState *lexer, int n) +{ + while (n--) + { + advanceChar(lexer); + } +} + +/* Store the current character in lexerState::token_str if there is space + * (set by MAX_STRING_LENGTH), and then read the next character from the file */ +static void advanceAndStoreChar (lexerState *lexer) +{ + if (vStringLength(lexer->token_str) < MAX_STRING_LENGTH) + { + vStringPut(lexer->token_str, (char) lexer->cur_c); + } + advanceChar(lexer); +} + +static bool isWhitespace (int c, bool newline) +{ + if (newline) + { + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; + } + return c == ' ' || c == '\t'; +} + +static bool isAscii (int c) +{ + return (c >= 0) && (c < 0x80); +} + +static bool isOperator (int c) +{ + if (c == '%' || c == '^' || c == '&' || c == '|' || + c == '*' || c == '-' || c == '+' || c == '~' || + c == '<' || c == '>' || c == ',' || c == '/' || + c == '?' || c == '=' || c == ':' ) + { + return true; + } + return false; +} + +/* This does not distinguish Unicode letters from operators... */ +static bool isIdentifierFirstCharacter (int c) +{ + return (bool) ((isAscii(c) && (isalpha (c) || c == '_')) || c >= 0xC0); +} + +/* This does not distinguish Unicode letters from operators... + * The dot is considered an identifier character for fully qualified names + * */ +static bool isIdentifierCharacter (int c) +{ + return (bool) (isIdentifierFirstCharacter(c) || (isAscii(c) && (isdigit(c) || c == '!' || c == '.')) || c >= 0x80); +} + +static void skipWhitespace (lexerState *lexer, bool newline) +{ + while (isWhitespace(lexer->cur_c, newline)) + { + advanceChar(lexer); + } +} + +/* The transpose operator is only allowed after an identifier, a number, an expression inside parenthesis or an index */ +static bool isTranspose (int c) +{ + return (isIdentifierCharacter(c) || c == ')' || c == ']'); +} + + +/* + * Lexer functions + * */ + +/* Check that the current character sequence is a type declaration or inheritance */ +static bool isTypeDecl (lexerState *lexer) +{ + if ((lexer->prev_c != '.' && lexer->cur_c == '<' && lexer->next_c == ':') || + (lexer->prev_c != '.' && lexer->cur_c == '>' && lexer->next_c == ':') || + (lexer->cur_c == ':' && lexer->next_c == ':') ) + { + return true; + } + return false; +} + +/* Check if the current char is a new line */ +static bool isNewLine (lexerState *lexer) +{ + return (lexer->cur_c == '\n')? true: false; +} + +/* Check if the current char is a new line. + * If it is, skip the newline and return true */ +static bool skipNewLine (lexerState *lexer) +{ + if (isNewLine(lexer)) + { + advanceChar(lexer); + return true; + } + return false; +} + +/* Skip a single comment or multiline comment + * A single line comment starts with # + * A multi-line comment is encapsulated in #=...=# and they are nesting + * */ +static void skipComment (lexerState *lexer) +{ + /* # */ + if (lexer->next_c != '=') + { + advanceNChar(lexer, 1); + while (lexer->cur_c != EOF && lexer->cur_c != '\n') + { + advanceChar(lexer); + } + } + /* block comment */ + else /* if (lexer->next_c == '=') */ + { + int level = 1; + advanceNChar(lexer, 2); + while (lexer->cur_c != EOF && level > 0) + { + if (lexer->cur_c == '=' && lexer->next_c == '#') + { + level--; + advanceNChar(lexer, 2); + } + else if (lexer->cur_c == '#' && lexer->next_c == '=') + { + level++; + advanceNChar(lexer, 2); + } + else + { + advanceChar(lexer); + } + } + } +} + +static void scanIdentifier (lexerState *lexer, bool clear) +{ + if (clear) + { + vStringClear(lexer->token_str); + } + + do + { + advanceAndStoreChar(lexer); + } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c)); +} + +/* Scan a quote-like expression. + * Allow for triple-character variand and interpolation with `$`. + * These last past the end of the line, so be careful + * not to store too much of them (see MAX_STRING_LENGTH). */ +static void scanStringOrCommand (lexerState *lexer, int c) +{ + bool istriple = false; + + /* Pass the first "quote"-character */ + advanceAndStoreChar(lexer); + + /* Check for triple "quote"-character */ + if (lexer->cur_c == c && lexer->next_c == c) + { + istriple = true; + advanceAndStoreChar(lexer); + advanceAndStoreChar(lexer); + + /* Cancel up to 2 "quote"-characters after opening the triple */ + if (lexer->cur_c == c) + { + advanceAndStoreChar(lexer); + if (lexer->cur_c == c) + { + advanceAndStoreChar(lexer); + } + } + } + + while (lexer->cur_c != EOF && lexer->cur_c != c) + { + /* Check for interpolation before checking for end of "quote" */ + if (lexer->cur_c == '$' && lexer->next_c == '(') + { + advanceAndStoreChar(lexer); + scanParenBlock(lexer); + /* continue to avoid advance character again. Correct bug + * with "quote"-character just after closing parenthesis */ + continue; + } + + if (lexer->cur_c == '\\' && + (lexer->next_c == c || lexer->next_c == '\\')) + { + advanceAndStoreChar(lexer); + } + advanceAndStoreChar(lexer); + + /* Cancel up to 2 "quote"-characters if triple string */ + if (istriple && lexer->cur_c == c) + { + advanceAndStoreChar(lexer); + if (lexer->cur_c == c) + { + advanceAndStoreChar(lexer); + } + } + } + /* Pass the last "quote"-character */ + advanceAndStoreChar(lexer); +} + + +/* Scan commands surrounded by backticks, + * possibly triple backticks */ +static void scanCommand (lexerState *lexer) +{ + scanStringOrCommand(lexer, '`'); +} + +/* Double-quoted strings, + * possibly triple doublequotes */ +static void scanString (lexerState *lexer) +{ + scanStringOrCommand(lexer, '"'); +} + + +/* This deals with character literals: 'n', '\n', '\uFFFF'; + * and matrix transpose: A'. + * We'll use this approximate regexp for the literals: + * \' [^'] \' or \' \\ [^']+ \' or \' \\ \' \' + * Either way, we'll treat this token as a string, so it gets preserved */ +static bool scanCharacterOrTranspose (lexerState *lexer) +{ + if (isTranspose(lexer->prev_c)) + { + /* deal with untranspose/transpose sequence */ + while (lexer->cur_c != EOF && lexer->cur_c == '\'') + { + advanceAndStoreChar(lexer); + } + return false; + } + + //vStringClear(lexer->token_str); + advanceAndStoreChar(lexer); + + if (lexer->cur_c == '\\') + { + advanceAndStoreChar(lexer); + /* The \' \\ \' \' (literally '\'') case */ + if (lexer->cur_c == '\'' && lexer->next_c == '\'') + { + advanceAndStoreChar(lexer); + advanceAndStoreChar(lexer); + } + /* The \' \\ [^']+ \' case */ + else + { + while (lexer->cur_c != EOF && lexer->cur_c != '\'') + { + advanceAndStoreChar(lexer); + } + } + } + /* The \' [^'] \' and \' \' \' cases */ + else if (lexer->next_c == '\'') + { + advanceAndStoreChar(lexer); + advanceAndStoreChar(lexer); + } + /* Otherwise it is malformed */ + return true; +} + +/* Parse a block with opening and closing character */ +static void scanBlock (lexerState *lexer, int open, int close, bool convert_newline) +{ + /* Assume the current char is `open` */ + int level = 1; + + /* Pass the first opening */ + advanceAndStoreChar(lexer); + + while (lexer->cur_c != EOF && level > 0) + { + /* Parse everything */ + if (lexer->cur_c == ' ' || lexer->cur_c == '\t') + { + skipWhitespace(lexer, false); + vStringPut(lexer->token_str, ' '); + } + if (lexer->cur_c == '#') + { + skipComment(lexer); + } + else if (lexer->cur_c == '\"') + { + scanString(lexer); + } + else if (lexer->cur_c == '\'') + { + scanCharacterOrTranspose(lexer); + } + + /* Parse opening/closing */ + if (lexer->cur_c == open) + { + level++; + } + else if (lexer->cur_c == close) + { + level--; + } + + if (convert_newline && skipNewLine(lexer)) + { + vStringPut(lexer->token_str, ' '); + } + else + { + advanceAndStoreChar(lexer); + } + + } + /* Lexer position is just after `close` */ +} + + +/* Parse a block inside parenthesis, for example a function argument list */ +static void scanParenBlock (lexerState *lexer) +{ + scanBlock(lexer, '(', ')', true); +} + +/* Indexing block with bracket. + * Some keywords have a special meaning in this environment: + * end, begin, for and if */ +static void scanIndexBlock (lexerState *lexer) +{ + scanBlock(lexer, '[', ']', false); + +} + +/* Parse a block inside curly brackets, for type parametrization */ +static void scanCurlyBlock (lexerState *lexer) +{ + scanBlock(lexer, '{', '}', true); +} + +/* Scan type annotation like + * `::Type`, `::Type{T}` + */ +static void scanTypeAnnotation (lexerState *lexer) +{ + /* assume that current char is '<', '>' or ':', followed by ':' */ + advanceAndStoreChar(lexer); + advanceAndStoreChar(lexer); + + skipWhitespace(lexer, true); + scanIdentifier(lexer, false); + if (lexer->cur_c == '{') + { + scanCurlyBlock(lexer); + } +} + +/* Scan type annotation like + * `where Int<:T<:Real`, `where S<:Array{Real}` or `where {S, T}` + */ +static void scanTypeWhere (lexerState *lexer) +{ + /* assume that current token is 'where' + * allow line continuation */ + vStringPut(lexer->token_str, ' '); + skipWhitespace(lexer, true); + + while (lexer->cur_c != EOF) + { + + if (lexer->cur_c == '{') + { + scanCurlyBlock(lexer); + } + else if (isIdentifierFirstCharacter(lexer->cur_c)) + { + scanIdentifier(lexer, false); + if (endswith(vStringValue(lexer->token_str), "where")) + { + /* allow line continuation */ + vStringPut(lexer->token_str, ' '); + skipWhitespace(lexer, true); + } + } + else if (isTypeDecl(lexer)) + { + scanTypeAnnotation(lexer); + //skipWhitespace(lexer, false); + } + else if (lexer->cur_c == '#') + { + skipComment(lexer); + /* allow line continuation */ + if (endswith(vStringValue(lexer->token_str), "where ")) + { + skipWhitespace(lexer, true); + } + } + else if (isWhitespace(lexer->cur_c, false)) + { + while (isWhitespace(lexer->cur_c, false)) + { + advanceChar(lexer); + } + /* Add a space, if it is not a trailing space */ + if (!(isNewLine(lexer))) + { + vStringPut(lexer->token_str, ' '); + } + } + else + { + break; + } + } +} + + +static int parseIdentifier (lexerState *lexer) +{ + langType julia = getInputLanguage (); + scanIdentifier(lexer, true); + + int k = lookupKeyword (vStringValue(lexer->token_str), julia); + /* First part of a composed identifier */ + if (k == TOKEN_COMPOSER_KWD) + { + skipWhitespace(lexer, false); + scanIdentifier(lexer, true); + k = lookupKeyword (vStringValue(lexer->token_str), julia); + } + + if ((k == TOKEN_OPEN_BLOCK) + || (k == TOKEN_MODULE) + || (k == TOKEN_IMPORT) + || (k == TOKEN_EXPORT) + || (k == TOKEN_CONST) + || (k == TOKEN_MACRO) + || (k == TOKEN_FUNCTION) + || (k == TOKEN_STRUCT) + || (k == TOKEN_TYPE) + || (k == TOKEN_TYPE_WHERE) + || (k == TOKEN_CLOSE_BLOCK)) + { + if (k == TOKEN_TYPE_WHERE) + { + scanTypeWhere(lexer); + } + return lexer->cur_token = k; + } + return lexer->cur_token = TOKEN_IDENTIFIER; +} + + +/* Advances the parser one token, optionally skipping whitespace + * (otherwise it is concatenated and returned as a single whitespace token). + * Whitespace is needed to properly render function signatures. Unrecognized + * token starts are stored literally, e.g. token may equal to a character '#'. */ +static int advanceToken (lexerState *lexer, bool skip_whitespace) +{ + bool have_whitespace = false; + bool newline = false; + lexer->line = getInputLineNumber(); + lexer->pos = getInputFilePosition(); + + /* the next token is the first token of the line */ + if (lexer->cur_token == TOKEN_NEWLINE || + lexer->cur_token == TOKEN_SEMICOLON || + (lexer->first_token && lexer->cur_token == TOKEN_MACROCALL)) + { + lexer->first_token = true; + } + else + { + lexer->first_token = false; + } + + while (lexer->cur_c != EOF) + { + /* skip whitespaces but not newlines */ + if (isWhitespace(lexer->cur_c, newline)) + { + skipWhitespace(lexer, newline); + have_whitespace = true; + } + else if (lexer->cur_c == '#') + { + skipComment(lexer); + have_whitespace = true; + } + else + { + if (have_whitespace && !skip_whitespace) + { + return lexer->cur_token = TOKEN_WHITESPACE; + } + break; + } + } + lexer->line = getInputLineNumber(); + lexer->pos = getInputFilePosition(); + while (lexer->cur_c != EOF) + { + if (lexer->cur_c == '"') + { + vStringClear(lexer->token_str); + scanString(lexer); + return lexer->cur_token = TOKEN_STRING; + } + else if (lexer->cur_c == '\'') + { + vStringClear(lexer->token_str); + if (scanCharacterOrTranspose(lexer)) + { + return lexer->cur_token = TOKEN_STRING; + } + else + { + return lexer->cur_token = '\''; + } + } + else if (lexer->cur_c == '`') + { + vStringClear(lexer->token_str); + scanCommand(lexer); + return lexer->cur_token = TOKEN_COMMAND; + } + else if (isIdentifierFirstCharacter(lexer->cur_c)) + { + return parseIdentifier(lexer); + } + else if (lexer->cur_c == '@') + { + vStringClear(lexer->token_str); + advanceAndStoreChar(lexer); + do + { + advanceAndStoreChar(lexer); + } while(lexer->cur_c != EOF && isIdentifierCharacter(lexer->cur_c)); + return lexer->cur_token = TOKEN_MACROCALL; + } + else if (lexer->cur_c == '(') + { + vStringClear(lexer->token_str); + scanParenBlock(lexer); + return lexer->cur_token = TOKEN_PAREN_BLOCK; + } + else if (lexer->cur_c == '[') + { + vStringClear(lexer->token_str); + scanIndexBlock(lexer); + return lexer->cur_token = TOKEN_BRACKET_BLOCK; + } + else if (lexer->cur_c == '{') + { + vStringClear(lexer->token_str); + scanCurlyBlock(lexer); + return lexer->cur_token = TOKEN_CURLY_BLOCK; + } + else if (isTypeDecl(lexer)) + { + vStringClear(lexer->token_str); + scanTypeAnnotation(lexer); + return lexer->cur_token = TOKEN_TYPE_ANNOTATION; + } + else if (skipNewLine(lexer)) + { + /* allow line continuation */ + if (isOperator(lexer->cur_token)) + { + return lexer->cur_token; + } + return lexer->cur_token = TOKEN_NEWLINE; + } + else if (lexer->cur_c == ';') + { + advanceChar(lexer); + return lexer->cur_token = TOKEN_SEMICOLON; + } + else + { + int c = lexer->cur_c; + advanceChar(lexer); + return lexer->cur_token = c; + } + } + return lexer->cur_token = TOKEN_EOF; +} + +static void initLexer (lexerState *lexer) +{ + advanceNChar(lexer, 2); + lexer->token_str = vStringNew(); + lexer->first_token = true; + lexer->cur_token = TOKEN_NONE; + + if (lexer->cur_c == '#' && lexer->next_c == '!') + { + skipComment(lexer); + } + advanceToken(lexer, true); +} + +static void deInitLexer (lexerState *lexer) +{ + vStringDelete(lexer->token_str); + lexer->token_str = NULL; +} + +#if 0 +static void debugLexer (lexerState *lexer) +{ + printf("Current lexer state: line %d, token (%lu), cur char `%c`, token str:\n\t`", lexer->line, lexer->cur_token, lexer->cur_c); + printf(vStringValue(lexer->token_str)); + printf("`\n"); +} +#endif + +static void addTag (vString* ident, const char* type, const char* arg_list, int kind, unsigned long line, MIOPos pos, vString *scope, int parent_kind) +{ + if (kind == K_NONE) + { + return; + } + tagEntryInfo tag; + initTagEntry(&tag, vStringValue(ident), kind); + + tag.lineNumber = line; + tag.filePosition = pos; + tag.sourceFileName = getInputFileName(); + + tag.extensionFields.signature = arg_list; + /* tag.extensionFields.varType = type; */ /* Needs a workaround */ + if (parent_kind != K_NONE) + { + tag.extensionFields.scopeKindIndex = parent_kind; + tag.extensionFields.scopeName = vStringValue(scope); + } + makeTagEntry(&tag); +} + +/* Skip tokens until one of the goal tokens is hit. Escapes when level = 0 if there are no goal tokens. + * Keeps track of balanced ()'s, []'s, and {}'s and ignores the goal tokens within those pairings */ +static void skipUntil (lexerState *lexer, int goal_tokens[], int num_goal_tokens) +{ + int block_level = 0; + + while (lexer->cur_token != TOKEN_EOF) + { + /* check if the keyword is reached, only if outside a block */ + if (block_level == 0) + { + int ii = 0; + for(ii = 0; ii < num_goal_tokens; ii++) + { + if (lexer->cur_token == goal_tokens[ii]) + { + break; + } + } + if (ii < num_goal_tokens) + { + /* parse the next token */ + advanceToken(lexer, true); + break; + } + } + + /* take into account nested blocks */ + switch (lexer->cur_token) + { + case TOKEN_OPEN_BLOCK: + block_level++; + break; + case TOKEN_CLOSE_BLOCK: + block_level--; + break; + default: + break; + } + + /* Has to be after the token switch to catch the case when we start with the initial level token */ + if (num_goal_tokens == 0 && block_level == 0) + { + break; + } + + advanceToken(lexer, true); + } +} + +/* Skip until the end of the block */ +static void skipUntilEnd (lexerState *lexer) +{ + int goal_tokens[] = { TOKEN_CLOSE_BLOCK }; + + skipUntil(lexer, goal_tokens, 1); +} + +/* Skip a function body after assignment operator '=' + * Beware of continuation lines after operators + * */ +static void skipBody (lexerState *lexer) +{ + /* assume position just after '=' */ + while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_NEWLINE) + { + advanceToken(lexer, true); + + if (lexer->cur_token == TOKEN_OPEN_BLOCK) + { + /* pass the keyword */ + advanceToken(lexer, true); + skipUntilEnd(lexer); + /* the next token is already selected */ + } + } +} + +/* Short function format: + * ( [] ) [::] [] = [begin] [end] + * */ +static void parseShortFunction (lexerState *lexer, vString *scope, int parent_kind) +{ + /* assume the current char is just after identifier */ + vString *name; + vString *arg_list; + unsigned long line; + MIOPos pos; + + /* should be an open parenthesis after identifier + * with potentially parametric type */ + skipWhitespace(lexer, false); + if (lexer->cur_c == '{') + { + scanCurlyBlock(lexer); + skipWhitespace(lexer, false); + } + + if (lexer->cur_c != '(') + { + advanceToken(lexer, true); + return; + } + + name = vStringNewCopy(lexer->token_str); + line = lexer->line; + pos = lexer->pos; + + /* scan argument list */ + advanceToken(lexer, true); + arg_list = vStringNewCopy(lexer->token_str); + + /* scan potential type casting */ + advanceToken(lexer, true); + if (lexer->cur_token == TOKEN_TYPE_ANNOTATION) + { + vStringCat(arg_list, lexer->token_str); + advanceToken(lexer, true); + } + /* scan potential type union with 'where' */ + if (lexer->cur_token == TOKEN_TYPE_WHERE) + { + vStringPut(arg_list, ' '); + vStringCat(arg_list, lexer->token_str); + advanceToken(lexer, true); + } + + /* scan equal sign */ + if (lexer->cur_token != '=' && + lexer->cur_c != '=' && + lexer->cur_c != '>') + { + vStringDelete(name); + vStringDelete(arg_list); + return; + } + + addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, scope, parent_kind); + + /* scan until end of function definition */ + skipBody(lexer); + + /* Should end on a new line, parse next token */ + advanceToken(lexer, true); + lexer->first_token = true; + + vStringDelete(name); + vStringDelete(arg_list); +} + +/* Function format: + * function ( [] ) [::] [] [] end + * */ +static void parseFunction (lexerState *lexer, vString *scope, int parent_kind) +{ + vString *name; + vString *arg_list; + unsigned long line; + MIOPos pos; + + advanceToken(lexer, true); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + name = vStringNewCopy(lexer->token_str); + arg_list = vStringNew(); + line = lexer->line; + pos = lexer->pos; + + advanceToken(lexer, true); + if (lexer->cur_token == TOKEN_PAREN_BLOCK) + { + vStringCopy(arg_list, lexer->token_str); + + /* scan potential type casting */ + advanceToken(lexer, true); + if (lexer->cur_token == TOKEN_TYPE_ANNOTATION) + { + vStringCat(arg_list, lexer->token_str); + advanceToken(lexer, true); + } + /* scan potential type union with 'where' */ + if (lexer->cur_token == TOKEN_TYPE_WHERE) + { + vStringPut(arg_list, ' '); + vStringCat(arg_list, lexer->token_str); + advanceToken(lexer, true); + } + + addTag(name, NULL, vStringValue(arg_list), K_FUNCTION, line, pos, scope, parent_kind); + //addToScope(scope, name); + //parseExpr(lexer, true, K_FUNCTION, scope); + } + else if (lexer->cur_token == TOKEN_CLOSE_BLOCK) + { + /* Function without method */ + addTag(name, NULL, NULL, K_FUNCTION, line, pos, scope, parent_kind); + } + + /* Go to the closing 'end' keyword */ + skipUntilEnd(lexer); + + vStringDelete(name); + vStringDelete(arg_list); +} + +/* Macro format: + * "macro" () + */ +static void parseMacro (lexerState *lexer, vString *scope, int parent_kind) +{ + vString *name; + unsigned long line; + MIOPos pos; + + advanceToken(lexer, true); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + name = vStringNewCopy(lexer->token_str); + line = lexer->line; + pos = lexer->pos; + + advanceToken(lexer, true); + if (lexer->cur_token == TOKEN_PAREN_BLOCK) + { + addTag(name, NULL, vStringValue(lexer->token_str), K_MACRO, line, pos, scope, parent_kind); + } + + skipUntilEnd(lexer); + vStringDelete(name); +} + +/* Const format: + * "const" + */ +static void parseConst (lexerState *lexer, vString *scope, int parent_kind) +{ + vString *name; + + advanceToken(lexer, true); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + name = vStringNewCopy(lexer->token_str); + + advanceToken(lexer, true); + if (lexer->cur_token == TOKEN_TYPE_ANNOTATION) + { + addTag(name, "const", vStringValue(lexer->token_str), K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind); + advanceToken(lexer, true); + } + else + { + addTag(name, "const", NULL, K_CONSTANT, lexer->line, lexer->pos, scope, parent_kind); + } + + vStringDelete(name); +} + +/* Type format: + * [ "abstract" | "primitive" ] "type" + */ +static void parseType (lexerState *lexer, vString *scope, int parent_kind) +{ + advanceToken(lexer, true); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + addTag(lexer->token_str, NULL, NULL, K_TYPE, lexer->line, lexer->pos, scope, parent_kind); + + skipUntilEnd(lexer); +} + +/* Module format: + * [ "baremodule" | "module" ] + */ +static void parseModule (lexerState *lexer, vString *scope, int parent_kind) +{ + advanceToken(lexer, true); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + addTag(lexer->token_str, NULL, NULL, K_MODULE, lexer->line, lexer->pos, scope, parent_kind); + //addToScope(scope, lexer->token_str); + //advanceToken(lexer, true); + //parseExpr(lexer, true, K_MODULE, scope); +} + +/* Import format: + * [ "import" | "using" ] [: ] + */ +static void parseImport (lexerState *lexer, vString *scope, int parent_kind) +{ + vString *name = vStringNew(); + + /* capture the imported name */ + advanceToken(lexer, true); + if (lexer->cur_c == ':') + { + advanceAndStoreChar(lexer); + vStringCopy(name, lexer->token_str); + advanceToken(lexer, true); + } + + while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL) + { + addTag(lexer->token_str, vStringValue(name), NULL, K_IMPORT, lexer->line, lexer->pos, scope, parent_kind); + + skipWhitespace(lexer, false); + if (lexer->cur_c == ',') + { + advanceNChar(lexer, 1); + advanceToken(lexer, true); + if (lexer->cur_token == TOKEN_NEWLINE) + { + advanceToken(lexer, true); + } + } + else + { + advanceToken(lexer, true); + } + } + + vStringDelete(name); +} + +/* Structs format: + * "struct" [{}] [<:]; end + * */ +static void parseStruct (lexerState *lexer, vString *scope, int parent_kind) +{ + vString *name; + vString *field; + unsigned long line; + MIOPos pos; + + advanceToken(lexer, true); + if (lexer->cur_token != TOKEN_IDENTIFIER) + { + return; + } + + name = vStringNewCopy(lexer->token_str); + field = vStringNew(); + line = lexer->line; + pos = lexer->pos; + + /* scan parametrization */ + advanceToken(lexer, true); + if (lexer->cur_token == TOKEN_CURLY_BLOCK) + { + addTag(name, NULL, vStringValue(lexer->token_str), K_STRUCT, line, pos, scope, parent_kind); + advanceToken(lexer, true); + } + else + { + addTag(name, NULL, NULL, K_STRUCT, line, pos, scope, parent_kind); + } + addToScope(scope, name); + + /* skip inheritance */ + if (lexer->cur_token == TOKEN_TYPE_ANNOTATION) + { + advanceToken(lexer, true); + } + + /* Parse fields and inner constructors */ + while (lexer->cur_token != TOKEN_EOF && lexer->cur_token != TOKEN_CLOSE_BLOCK) + { + if (lexer->cur_token == TOKEN_IDENTIFIER) + { + if (lexer->first_token && strcmp(vStringValue(lexer->token_str), vStringValue(name)) == 0) + { + /* inner constructor */ + parseShortFunction(lexer, scope, K_STRUCT); + continue; + } + + vStringCopy(field, lexer->token_str); + + /* parse type annotation */ + advanceToken(lexer, true); + if (lexer->cur_token == TOKEN_TYPE_ANNOTATION) + { + addTag(field, NULL, vStringValue(lexer->token_str), K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT); + advanceToken(lexer, true); + } + else + { + addTag(field, NULL, NULL, K_FIELD, lexer->line, lexer->pos, scope, K_STRUCT); + } + } + else if (lexer->cur_token == TOKEN_FUNCTION) + { + /* inner constructor */ + parseFunction(lexer, scope, K_STRUCT); + } + else + { + /* Get next token */ + advanceToken(lexer, true); + } + } + + vStringDelete(name); + vStringDelete(field); +} + + +static void parseExpr (lexerState *lexer, bool delim, int kind, vString *scope) +{ + int level = 1; + size_t old_scope_len; + + while (lexer->cur_token != TOKEN_EOF) + { + old_scope_len = vStringLength(scope); + /* Advance token and update if this is a new line */ + while (lexer->cur_token == TOKEN_NEWLINE || + lexer->cur_token == TOKEN_SEMICOLON ) + { + advanceToken(lexer, true); + } + + /* Make sure every case advances the token + * otherwise we can be stuck in infinite loop */ + switch (lexer->cur_token) + { + case TOKEN_CONST: + parseConst(lexer, scope, kind); + break; + case TOKEN_FUNCTION: + parseFunction(lexer, scope, kind); + break; + case TOKEN_MACRO: + parseMacro(lexer, scope, kind); + break; + case TOKEN_MODULE: + parseModule(lexer, scope, kind); + break; + case TOKEN_STRUCT: + parseStruct(lexer, scope, kind); + break; + case TOKEN_TYPE: + parseType(lexer, scope, kind); + break; + case TOKEN_IMPORT: + parseImport(lexer, scope, kind); + break; + case TOKEN_IDENTIFIER: + skipWhitespace(lexer, false); + if (lexer->first_token && lexer->cur_c == '(') + { + parseShortFunction(lexer, scope, kind); + } + else + { + advanceToken(lexer, true); + } + break; + case TOKEN_OPEN_BLOCK: + level++; + advanceToken(lexer, true); + break; + case TOKEN_CLOSE_BLOCK: + level--; + advanceToken(lexer, true); + break; + default: + advanceToken(lexer, true); + break; + } + resetScope(scope, old_scope_len); + if (delim && level <= 0) + { + break; + } + } +} + +static void findJuliaTags (void) +{ + lexerState lexer; + vString* scope = vStringNew(); + initLexer(&lexer); + + parseExpr(&lexer, false, K_NONE, scope); + vStringDelete(scope); + + deInitLexer(&lexer); +} + +extern parserDefinition* JuliaParser (void) +{ + static const char *const extensions [] = { "jl", NULL }; + parserDefinition* def = parserNew ("Julia"); + def->kindTable = JuliaKinds; + def->kindCount = ARRAY_SIZE (JuliaKinds); + def->extensions = extensions; + def->parser = findJuliaTags; + def->keywordTable = JuliaKeywordTable; + def->keywordCount = ARRAY_SIZE (JuliaKeywordTable); + return def; +} diff --git a/source.mak b/source.mak index 2374c06955..e809b3ada3 100644 --- a/source.mak +++ b/source.mak @@ -244,6 +244,7 @@ PARSER_SRCS = \ parsers/jprop.c \ parsers/jscript.c \ parsers/json.c \ + parsers/julia.c \ parsers/ldscript.c \ parsers/lisp.c \ parsers/lua.c \ diff --git a/win32/ctags_vs2013.vcxproj b/win32/ctags_vs2013.vcxproj index 465adf6599..013e8087c9 100644 --- a/win32/ctags_vs2013.vcxproj +++ b/win32/ctags_vs2013.vcxproj @@ -219,6 +219,7 @@ + diff --git a/win32/ctags_vs2013.vcxproj.filters b/win32/ctags_vs2013.vcxproj.filters index 4ce7f3ca1b..cc2b168299 100644 --- a/win32/ctags_vs2013.vcxproj.filters +++ b/win32/ctags_vs2013.vcxproj.filters @@ -393,6 +393,9 @@ Source Files\Parsers + + Source Files\Parsers + Source Files\Parsers From 09796eb3d68ea63cd4dc00671c7174a86e9c86f8 Mon Sep 17 00:00:00 2001 From: Masatake YAMATO Date: Tue, 24 Nov 2020 00:11:18 +0900 Subject: [PATCH 2/2] Julia: disable unknown kind temporarily Signed-off-by: Masatake YAMATO --- Units/parser-julia.r/julia_test.d/expected.tags | 5 ----- parsers/julia.c | 6 +++--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/Units/parser-julia.r/julia_test.d/expected.tags b/Units/parser-julia.r/julia_test.d/expected.tags index c144b4dc47..15b4ad21b4 100644 --- a/Units/parser-julia.r/julia_test.d/expected.tags +++ b/Units/parser-julia.r/julia_test.d/expected.tags @@ -1,8 +1,3 @@ -Revise input.jl /^using Revise$/;" x -Normal input.jl /^import Distributions: Normal$/;" x -Random.randn input.jl /^using Random.randn$/;" x -Plots input.jl /^using Plots, Makie$/;" x -Makie input.jl /^using Plots, Makie$/;" x a input.jl /^const a::Int = 'c' # struct Struct_wrong3 end$/;" c test_macro input.jl /^macro test_macro() end$/;" m test_fun input.jl /^function test_fun(a::Int, b::T) where #$/;" f diff --git a/parsers/julia.c b/parsers/julia.c index 9d7b3516d0..ac7df5e51b 100644 --- a/parsers/julia.c +++ b/parsers/julia.c @@ -44,7 +44,7 @@ typedef enum { K_MODULE, K_STRUCT, K_TYPE, - K_IMPORT, + // K_IMPORT, K_NONE } JuliaKind; @@ -56,7 +56,7 @@ static kindDefinition JuliaKinds [] = { { true, 'n', "module", "Modules" }, { true, 's', "struct", "Structures" }, { true, 't', "type", "Types" }, - { true, 'x', "unknown", "Imported name"} + // { true, 'x', "unknown", "Imported name"} }; typedef enum { @@ -1163,7 +1163,7 @@ static void parseImport (lexerState *lexer, vString *scope, int parent_kind) while (lexer->cur_token == TOKEN_IDENTIFIER || lexer->cur_token == TOKEN_MACROCALL) { - addTag(lexer->token_str, vStringValue(name), NULL, K_IMPORT, lexer->line, lexer->pos, scope, parent_kind); + // addTag(lexer->token_str, vStringValue(name), NULL, K_IMPORT, lexer->line, lexer->pos, scope, parent_kind); skipWhitespace(lexer, false); if (lexer->cur_c == ',')