aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'tree-sitter-bash/src/scanner.c')
-rw-r--r--tree-sitter-bash/src/scanner.c1154
1 files changed, 0 insertions, 1154 deletions
diff --git a/tree-sitter-bash/src/scanner.c b/tree-sitter-bash/src/scanner.c
deleted file mode 100644
index b37f9126..00000000
--- a/tree-sitter-bash/src/scanner.c
+++ /dev/null
@@ -1,1154 +0,0 @@
-#include <assert.h>
-#include <ctype.h>
-#include <string.h>
-#include <wctype.h>
-
-#include "tree_sitter/parser.h"
-
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-
-#define STRING_RESIZE(vec, _cap) \
- void *tmp = realloc((vec).data, ((_cap) + 1) * sizeof((vec).data[0])); \
- assert(tmp != NULL); \
- (vec).data = tmp; \
- memset((vec).data + (vec).len, 0, \
- (((_cap) + 1) - (vec).len) * sizeof((vec).data[0])); \
- (vec).cap = (_cap);
-
-#define STRING_GROW(vec, _cap) \
- if ((vec).cap < (_cap)) { \
- STRING_RESIZE((vec), (_cap)); \
- }
-
-#define STRING_PUSH(vec, el) \
- if ((vec).cap == (vec).len) { \
- STRING_RESIZE((vec), MAX(16, (vec).len * 2)); \
- } \
- (vec).data[(vec).len++] = (el);
-
-#define STRING_FREE(vec) \
- if ((vec).data != NULL) \
- free((vec).data);
-
-#define STRING_CLEAR(vec) \
- { \
- (vec).len = 0; \
- memset((vec).data, 0, (vec).cap * sizeof(char)); \
- }
-
-enum TokenType {
- HEREDOC_START,
- SIMPLE_HEREDOC_BODY,
- HEREDOC_BODY_BEGINNING,
- HEREDOC_CONTENT,
- HEREDOC_END,
- FILE_DESCRIPTOR,
- EMPTY_VALUE,
- CONCAT,
- VARIABLE_NAME,
- TEST_OPERATOR,
- REGEX,
- REGEX_NO_SLASH,
- REGEX_NO_SPACE,
- EXPANSION_WORD,
- EXTGLOB_PATTERN,
- BARE_DOLLAR,
- BRACE_START,
- IMMEDIATE_DOUBLE_HASH,
- EXTERNAL_EXPANSION_SYM_HASH,
- EXTERNAL_EXPANSION_SYM_BANG,
- EXTERNAL_EXPANSION_SYM_EQUAL,
- CLOSING_BRACE,
- CLOSING_BRACKET,
- HEREDOC_ARROW,
- HEREDOC_ARROW_DASH,
- NEWLINE,
- ERROR_RECOVERY,
-};
-
-typedef struct {
- uint32_t cap;
- uint32_t len;
- char *data;
-} String;
-
-static String string_new() {
- return (String){.cap = 16, .len = 0, .data = calloc(1, sizeof(char) * 17)};
-}
-
-typedef struct {
- bool heredoc_is_raw;
- bool started_heredoc;
- bool heredoc_allows_indent;
- uint8_t last_glob_paren_depth;
- String heredoc_delimiter;
- String current_leading_word;
-} Scanner;
-
-static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
-
-static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
-
-static inline bool in_error_recovery(const bool *valid_symbols) {
- return valid_symbols[ERROR_RECOVERY];
-}
-
-static inline void reset(Scanner *scanner) {
- scanner->heredoc_is_raw = false;
- scanner->started_heredoc = false;
- scanner->heredoc_allows_indent = false;
- STRING_CLEAR(scanner->heredoc_delimiter);
-}
-
-static unsigned serialize(Scanner *scanner, char *buffer) {
- if (scanner->heredoc_delimiter.len + 4 >=
- TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
- return 0;
- }
- buffer[0] = (char)scanner->heredoc_is_raw;
- buffer[1] = (char)scanner->started_heredoc;
- buffer[2] = (char)scanner->heredoc_allows_indent;
- buffer[3] = (char)scanner->last_glob_paren_depth;
- memcpy(&buffer[4], scanner->heredoc_delimiter.data,
- scanner->heredoc_delimiter.len);
- return scanner->heredoc_delimiter.len + 4;
-}
-
-static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
- if (length == 0) {
- reset(scanner);
- } else {
- scanner->heredoc_is_raw = buffer[0];
- scanner->started_heredoc = buffer[1];
- scanner->heredoc_allows_indent = buffer[2];
- scanner->last_glob_paren_depth = buffer[3];
- scanner->heredoc_delimiter.len = length - 4;
- STRING_GROW(scanner->heredoc_delimiter, scanner->heredoc_delimiter.len);
- memcpy(scanner->heredoc_delimiter.data, &buffer[4],
- scanner->heredoc_delimiter.len);
- }
-}
-
-/**
- * Consume a "word" in POSIX parlance, and returns it unquoted.
- *
- * This is an approximate implementation that doesn't deal with any
- * POSIX-mandated substitution, and assumes the default value for
- * IFS.
- */
-static bool advance_word(TSLexer *lexer, String *unquoted_word) {
- bool empty = true;
-
- int32_t quote = 0;
- if (lexer->lookahead == '\'' || lexer->lookahead == '"') {
- quote = lexer->lookahead;
- advance(lexer);
- }
-
- while (lexer->lookahead &&
- !(quote ? lexer->lookahead == quote : iswspace(lexer->lookahead))) {
- if (lexer->lookahead == '\\') {
- advance(lexer);
- if (!lexer->lookahead) {
- return false;
- }
- }
- empty = false;
- STRING_PUSH(*unquoted_word, lexer->lookahead);
- advance(lexer);
- }
-
- if (quote && lexer->lookahead == quote) {
- advance(lexer);
- }
-
- return !empty;
-}
-
-static inline bool scan_bare_dollar(TSLexer *lexer) {
- while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' &&
- !lexer->eof(lexer)) {
- skip(lexer);
- }
-
- if (lexer->lookahead == '$') {
- advance(lexer);
- lexer->result_symbol = BARE_DOLLAR;
- lexer->mark_end(lexer);
- return iswspace(lexer->lookahead) || lexer->eof(lexer);
- lexer->lookahead == '\"';
- }
-
- return false;
-}
-
-static bool scan_heredoc_start(Scanner *scanner, TSLexer *lexer) {
- while (iswspace(lexer->lookahead)) {
- skip(lexer);
- }
-
- lexer->result_symbol = HEREDOC_START;
- scanner->heredoc_is_raw = lexer->lookahead == '\'' ||
- lexer->lookahead == '"' ||
- lexer->lookahead == '\\';
- scanner->started_heredoc = false;
- STRING_CLEAR(scanner->heredoc_delimiter);
-
- bool found_delimiter = advance_word(lexer, &scanner->heredoc_delimiter);
- if (!found_delimiter)
- STRING_CLEAR(scanner->heredoc_delimiter);
- return found_delimiter;
-}
-
-static bool scan_heredoc_end_identifier(Scanner *scanner, TSLexer *lexer) {
- STRING_CLEAR(scanner->current_leading_word);
- // Scan the first 'n' characters on this line, to see if they match the
- // heredoc delimiter
- int32_t size = 0;
- while (lexer->lookahead != '\0' && lexer->lookahead != '\n' &&
- ((int32_t)scanner->heredoc_delimiter.data[size++]) ==
- lexer->lookahead &&
- scanner->current_leading_word.len < scanner->heredoc_delimiter.len) {
- STRING_PUSH(scanner->current_leading_word, lexer->lookahead);
- advance(lexer);
- }
- return strcmp(scanner->current_leading_word.data,
- scanner->heredoc_delimiter.data) == 0;
-}
-
-static bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer,
- enum TokenType middle_type,
- enum TokenType end_type) {
- bool did_advance = false;
-
- for (;;) {
- switch (lexer->lookahead) {
- case '\0': {
- if (lexer->eof(lexer) && did_advance) {
- reset(scanner);
- lexer->result_symbol = end_type;
- return true;
- }
- return false;
- }
-
- case '\\': {
- did_advance = true;
- advance(lexer);
- advance(lexer);
- break;
- }
-
- case '$': {
- if (scanner->heredoc_is_raw) {
- did_advance = true;
- advance(lexer);
- break;
- }
- if (did_advance) {
- lexer->mark_end(lexer);
- lexer->result_symbol = middle_type;
- scanner->started_heredoc = true;
- advance(lexer);
- if (isalpha(lexer->lookahead) || lexer->lookahead == '{') {
- return true;
- }
- break;
- }
- if (middle_type == HEREDOC_BODY_BEGINNING &&
- lexer->get_column(lexer) == 0) {
- lexer->result_symbol = middle_type;
- scanner->started_heredoc = true;
- return true;
- }
- return false;
- }
-
- case '\n': {
- if (!did_advance) {
- skip(lexer);
- } else {
- advance(lexer);
- }
- did_advance = true;
- if (scanner->heredoc_allows_indent) {
- while (iswspace(lexer->lookahead)) {
- advance(lexer);
- }
- }
- lexer->result_symbol =
- scanner->started_heredoc ? middle_type : end_type;
- lexer->mark_end(lexer);
- if (scan_heredoc_end_identifier(scanner, lexer)) {
- return true;
- }
- break;
- }
-
- default: {
- if (lexer->get_column(lexer) == 0) {
- // an alternative is to check the starting column of the
- // heredoc body and track that statefully
- while (iswspace(lexer->lookahead)) {
- did_advance ? advance(lexer) : skip(lexer);
- }
- if (end_type != SIMPLE_HEREDOC_BODY) {
- lexer->result_symbol = middle_type;
- if (scan_heredoc_end_identifier(scanner, lexer)) {
- return true;
- }
- }
- if (end_type == SIMPLE_HEREDOC_BODY) {
- lexer->result_symbol = end_type;
- lexer->mark_end(lexer);
- if (scan_heredoc_end_identifier(scanner, lexer)) {
- return true;
- }
- }
- }
- did_advance = true;
- advance(lexer);
- break;
- }
- }
- }
-}
-
-static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
- if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) {
- if (!(lexer->lookahead == 0 || iswspace(lexer->lookahead) ||
- lexer->lookahead == '>' || lexer->lookahead == '<' ||
- lexer->lookahead == ')' || lexer->lookahead == '(' ||
- lexer->lookahead == ';' || lexer->lookahead == '&' ||
- lexer->lookahead == '|' ||
- (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) ||
- (lexer->lookahead == ']' && valid_symbols[CLOSING_BRACKET]))) {
- lexer->result_symbol = CONCAT;
- // So for a`b`, we want to return a concat. We check if the 2nd
- // backtick has whitespace after it, and if it does we return
- // concat.
- if (lexer->lookahead == '`') {
- lexer->mark_end(lexer);
- advance(lexer);
- while (lexer->lookahead != '`' && !lexer->eof(lexer)) {
- advance(lexer);
- }
- if (lexer->eof(lexer)) {
- return false;
- }
- if (lexer->lookahead == '`') {
- advance(lexer);
- }
- return iswspace(lexer->lookahead) || lexer->eof(lexer);
- }
- // strings w/ expansions that contains escaped quotes or backslashes
- // need this to return a concat
- if (lexer->lookahead == '\\') {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '"' || lexer->lookahead == '\'' ||
- lexer->lookahead == '\\') {
- return true;
- }
- if (lexer->eof(lexer)) {
- return false;
- }
- } else {
- return true;
- }
- }
- if (iswspace(lexer->lookahead) && valid_symbols[CLOSING_BRACE] &&
- !valid_symbols[EXPANSION_WORD]) {
- lexer->result_symbol = CONCAT;
- return true;
- }
- }
-
- if (valid_symbols[IMMEDIATE_DOUBLE_HASH] &&
- !in_error_recovery(valid_symbols)) {
- // advance two # and ensure not } after
- if (lexer->lookahead == '#') {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '#') {
- advance(lexer);
- if (lexer->lookahead != '}') {
- lexer->result_symbol = IMMEDIATE_DOUBLE_HASH;
- lexer->mark_end(lexer);
- return true;
- }
- }
- }
- }
-
- if (valid_symbols[EXTERNAL_EXPANSION_SYM_HASH] &&
- !in_error_recovery(valid_symbols)) {
- if (lexer->lookahead == '#' || lexer->lookahead == '=' ||
- lexer->lookahead == '!') {
- lexer->result_symbol =
- lexer->lookahead == '#' ? EXTERNAL_EXPANSION_SYM_HASH
- : lexer->lookahead == '!' ? EXTERNAL_EXPANSION_SYM_BANG
- : EXTERNAL_EXPANSION_SYM_EQUAL;
- advance(lexer);
- lexer->mark_end(lexer);
- while (lexer->lookahead == '#' || lexer->lookahead == '=' ||
- lexer->lookahead == '!') {
- advance(lexer);
- }
- while (iswspace(lexer->lookahead)) {
- skip(lexer);
- }
- if (lexer->lookahead == '}') {
- return true;
- }
- return false;
- }
- }
-
- if (valid_symbols[EMPTY_VALUE]) {
- if (iswspace(lexer->lookahead) || lexer->eof(lexer) ||
- lexer->lookahead == ';' || lexer->lookahead == '&') {
- lexer->result_symbol = EMPTY_VALUE;
- return true;
- }
- }
-
- if ((valid_symbols[HEREDOC_BODY_BEGINNING] ||
- valid_symbols[SIMPLE_HEREDOC_BODY]) &&
- scanner->heredoc_delimiter.len > 0 && !scanner->started_heredoc &&
- !in_error_recovery(valid_symbols)) {
- return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING,
- SIMPLE_HEREDOC_BODY);
- }
-
- if (valid_symbols[HEREDOC_END]) {
- if (scan_heredoc_end_identifier(scanner, lexer)) {
- reset(scanner);
- lexer->result_symbol = HEREDOC_END;
- return true;
- }
- }
-
- if (valid_symbols[HEREDOC_CONTENT] && scanner->heredoc_delimiter.len > 0 &&
- scanner->started_heredoc && !in_error_recovery(valid_symbols)) {
- return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT,
- HEREDOC_END);
- }
-
- if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols)) {
- return scan_heredoc_start(scanner, lexer);
- }
-
- if (valid_symbols[TEST_OPERATOR] && !valid_symbols[EXPANSION_WORD]) {
- while (iswspace(lexer->lookahead) && lexer->lookahead != '\n') {
- skip(lexer);
- }
-
- if (lexer->lookahead == '\\') {
- if (valid_symbols[EXTGLOB_PATTERN]) {
- goto extglob_pattern;
- }
- if (valid_symbols[REGEX_NO_SPACE]) {
- goto regex;
- }
- skip(lexer);
-
- if (lexer->eof(lexer)) {
- return false;
- }
-
- if (lexer->lookahead == '\r') {
- skip(lexer);
- if (lexer->lookahead == '\n') {
- skip(lexer);
- }
- } else if (lexer->lookahead == '\n') {
- skip(lexer);
- } else {
- return false;
- }
-
- while (iswspace(lexer->lookahead)) {
- skip(lexer);
- }
- }
-
- if (lexer->lookahead == '\n' && !valid_symbols[NEWLINE]) {
- skip(lexer);
-
- while (iswspace(lexer->lookahead)) {
- skip(lexer);
- }
- }
-
- if (lexer->lookahead == '-') {
- advance(lexer);
-
- bool advanced_once = false;
- while (isalpha(lexer->lookahead)) {
- advanced_once = true;
- advance(lexer);
- }
-
- if (iswspace(lexer->lookahead) && advanced_once) {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '}' && valid_symbols[CLOSING_BRACE]) {
- if (valid_symbols[EXPANSION_WORD]) {
- lexer->mark_end(lexer);
- lexer->result_symbol = EXPANSION_WORD;
- return true;
- }
- return false;
- }
- lexer->result_symbol = TEST_OPERATOR;
- return true;
- }
- if (iswspace(lexer->lookahead) && valid_symbols[EXTGLOB_PATTERN]) {
- lexer->result_symbol = EXTGLOB_PATTERN;
- return true;
- }
- }
-
- if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) &&
- scan_bare_dollar(lexer)) {
- return true;
- }
- }
-
- if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] ||
- valid_symbols[HEREDOC_ARROW]) &&
- !valid_symbols[REGEX_NO_SLASH] && !in_error_recovery(valid_symbols)) {
- for (;;) {
- if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
- lexer->lookahead == '\r' ||
- (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) &&
- !valid_symbols[EXPANSION_WORD]) {
- skip(lexer);
- } else if (lexer->lookahead == '\\') {
- skip(lexer);
-
- if (lexer->eof(lexer)) {
- lexer->mark_end(lexer);
- lexer->result_symbol = VARIABLE_NAME;
- return true;
- }
-
- if (lexer->lookahead == '\r') {
- skip(lexer);
- }
- if (lexer->lookahead == '\n') {
- skip(lexer);
- } else {
- if (lexer->lookahead == '\\' &&
- valid_symbols[EXPANSION_WORD]) {
- goto expansion_word;
- }
- return false;
- }
- } else {
- break;
- }
- }
-
- // no '*', '@', '?', '-', '$', '0', '_'
- if (!valid_symbols[EXPANSION_WORD] &&
- (lexer->lookahead == '*' || lexer->lookahead == '@' ||
- lexer->lookahead == '?' || lexer->lookahead == '-' ||
- lexer->lookahead == '0' || lexer->lookahead == '_')) {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '=' || lexer->lookahead == '[' ||
- lexer->lookahead == ':' || lexer->lookahead == '-' ||
- lexer->lookahead == '%' || lexer->lookahead == '#' ||
- lexer->lookahead == '/') {
- return false;
- }
- if (valid_symbols[EXTGLOB_PATTERN] && iswspace(lexer->lookahead)) {
- lexer->mark_end(lexer);
- lexer->result_symbol = EXTGLOB_PATTERN;
- return true;
- }
- }
-
- if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') {
- advance(lexer);
- if (lexer->lookahead == '<') {
- advance(lexer);
- if (lexer->lookahead == '-') {
- advance(lexer);
- scanner->heredoc_allows_indent = true;
- lexer->result_symbol = HEREDOC_ARROW_DASH;
- } else if (lexer->lookahead == '<' || lexer->lookahead == '=') {
- return false;
- } else {
- scanner->heredoc_allows_indent = false;
- lexer->result_symbol = HEREDOC_ARROW;
- }
- return true;
- }
- return false;
- }
-
- bool is_number = true;
- if (iswdigit(lexer->lookahead)) {
- advance(lexer);
- } else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
- is_number = false;
- advance(lexer);
- } else {
- if (lexer->lookahead == '{') {
- goto brace_start;
- }
- if (valid_symbols[EXPANSION_WORD]) {
- goto expansion_word;
- }
- if (valid_symbols[EXTGLOB_PATTERN]) {
- goto extglob_pattern;
- }
- return false;
- }
-
- for (;;) {
- if (iswdigit(lexer->lookahead)) {
- advance(lexer);
- } else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
- is_number = false;
- advance(lexer);
- } else {
- break;
- }
- }
-
- if (is_number && valid_symbols[FILE_DESCRIPTOR] &&
- (lexer->lookahead == '>' || lexer->lookahead == '<')) {
- lexer->result_symbol = FILE_DESCRIPTOR;
- return true;
- }
-
- if (valid_symbols[VARIABLE_NAME]) {
- if (lexer->lookahead == '+') {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '=' || lexer->lookahead == ':' ||
- valid_symbols[CLOSING_BRACE]) {
- lexer->result_symbol = VARIABLE_NAME;
- return true;
- }
- return false;
- }
- if (lexer->lookahead == '/') {
- return false;
- }
- if (lexer->lookahead == '=' || lexer->lookahead == '[' ||
- (lexer->lookahead == ':' && !valid_symbols[CLOSING_BRACE]) ||
- lexer->lookahead == '%' ||
- (lexer->lookahead == '#' && !is_number) ||
- lexer->lookahead == '@' ||
- (lexer->lookahead == '-' && valid_symbols[CLOSING_BRACE])) {
- lexer->mark_end(lexer);
- lexer->result_symbol = VARIABLE_NAME;
- return true;
- }
-
- if (lexer->lookahead == '?') {
- lexer->mark_end(lexer);
- advance(lexer);
- lexer->result_symbol = VARIABLE_NAME;
- return isalpha(lexer->lookahead);
- }
- }
-
- return false;
- }
-
- if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) &&
- scan_bare_dollar(lexer)) {
- return true;
- }
-
-regex:
- if ((valid_symbols[REGEX] || valid_symbols[REGEX_NO_SLASH] ||
- valid_symbols[REGEX_NO_SPACE]) &&
- !in_error_recovery(valid_symbols)) {
- if (valid_symbols[REGEX] || valid_symbols[REGEX_NO_SPACE]) {
- while (iswspace(lexer->lookahead)) {
- skip(lexer);
- }
- }
-
- if ((lexer->lookahead != '"' && lexer->lookahead != '\'') ||
- (lexer->lookahead == '$' && valid_symbols[REGEX_NO_SLASH])) {
- typedef struct {
- bool done;
- bool advanced_once;
- bool found_non_alnumdollarunderdash;
- uint32_t paren_depth;
- uint32_t bracket_depth;
- uint32_t brace_depth;
- } State;
-
- if (lexer->lookahead == '$' && valid_symbols[REGEX_NO_SLASH]) {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '(') {
- return false;
- }
- }
-
- lexer->mark_end(lexer);
-
- State state = {false, false, false, 0, 0, 0};
- while (!state.done) {
- switch (lexer->lookahead) {
- case '\0':
- return false;
- case '(':
- state.paren_depth++;
- break;
- case '[':
- state.bracket_depth++;
- break;
- case '{':
- state.brace_depth++;
- break;
- case ')':
- if (state.paren_depth == 0) {
- state.done = true;
- }
- state.paren_depth--;
- break;
- case ']':
- if (state.bracket_depth == 0) {
- state.done = true;
- }
- state.bracket_depth--;
- break;
- case '}':
- if (state.brace_depth == 0) {
- state.done = true;
- }
- state.brace_depth--;
- break;
- }
-
- if (!state.done) {
- if (valid_symbols[REGEX]) {
- bool was_space = iswspace(lexer->lookahead);
- advance(lexer);
- state.advanced_once = true;
- if (!was_space || state.paren_depth > 0) {
- lexer->mark_end(lexer);
- }
- } else if (valid_symbols[REGEX_NO_SLASH]) {
- if (lexer->lookahead == '/') {
- lexer->mark_end(lexer);
- lexer->result_symbol = REGEX_NO_SLASH;
- return state.advanced_once;
- }
- if (lexer->lookahead == '\\') {
- advance(lexer);
- state.advanced_once = true;
- if (!lexer->eof(lexer) && lexer->lookahead != '[' &&
- lexer->lookahead != '/') {
- advance(lexer);
- lexer->mark_end(lexer);
- }
- } else {
- bool was_space = iswspace(lexer->lookahead);
- advance(lexer);
- state.advanced_once = true;
- if (!was_space) {
- lexer->mark_end(lexer);
- }
- }
- } else if (valid_symbols[REGEX_NO_SPACE]) {
- if (lexer->lookahead == '\\') {
- state.found_non_alnumdollarunderdash = true;
- advance(lexer);
- if (!lexer->eof(lexer)) {
- advance(lexer);
- }
- } else if (lexer->lookahead == '$') {
- lexer->mark_end(lexer);
- advance(lexer);
- // do not parse a command
- // substitution
- if (lexer->lookahead == '(') {
- return false;
- }
- // end $ always means regex, e.g.
- // 99999999$
- if (iswspace(lexer->lookahead)) {
- lexer->result_symbol = REGEX_NO_SPACE;
- lexer->mark_end(lexer);
- return true;
- }
- } else {
- if (iswspace(lexer->lookahead) &&
- state.paren_depth == 0) {
- lexer->mark_end(lexer);
- lexer->result_symbol = REGEX_NO_SPACE;
- return state.found_non_alnumdollarunderdash;
- }
- if (!iswalnum(lexer->lookahead) &&
- lexer->lookahead != '$' &&
- lexer->lookahead != '-' &&
- lexer->lookahead != '_') {
- state.found_non_alnumdollarunderdash = true;
- }
- advance(lexer);
- }
- }
- }
- }
-
- lexer->result_symbol =
- valid_symbols[REGEX_NO_SLASH] ? REGEX_NO_SLASH
- : valid_symbols[REGEX_NO_SPACE] ? REGEX_NO_SPACE
- : REGEX;
- if (valid_symbols[REGEX] && !state.advanced_once) {
- return false;
- }
- return true;
- }
- }
-
-extglob_pattern:
- if (valid_symbols[EXTGLOB_PATTERN]) {
- // first skip ws, then check for ? * + @ !
- while (iswspace(lexer->lookahead)) {
- skip(lexer);
- }
-
- if (lexer->lookahead == '?' || lexer->lookahead == '*' ||
- lexer->lookahead == '+' || lexer->lookahead == '@' ||
- lexer->lookahead == '!' || lexer->lookahead == '-' ||
- lexer->lookahead == ')' || lexer->lookahead == '\\' ||
- lexer->lookahead == '.') {
- if (lexer->lookahead == '\\') {
- advance(lexer);
- if ((iswspace(lexer->lookahead) || lexer->lookahead == '"') &&
- lexer->lookahead != '\r' && lexer->lookahead != '\n') {
- advance(lexer);
- } else {
- return false;
- }
- }
-
- if (lexer->lookahead == ')' &&
- scanner->last_glob_paren_depth == 0) {
- lexer->mark_end(lexer);
- advance(lexer);
-
- if (iswspace(lexer->lookahead)) {
- return false;
- }
- }
-
- lexer->mark_end(lexer);
- advance(lexer);
-
- // -\w is just a word, find something else special
- if (lexer->lookahead == '-') {
- lexer->mark_end(lexer);
- advance(lexer);
- while (isalnum(lexer->lookahead)) {
- advance(lexer);
- }
-
- if (lexer->lookahead == ')' || lexer->lookahead == '\\' ||
- lexer->lookahead == '.') {
- return false;
- }
- lexer->mark_end(lexer);
- }
-
- // case item -) or *)
- if (lexer->lookahead == ')' &&
- scanner->last_glob_paren_depth == 0) {
- lexer->mark_end(lexer);
- advance(lexer);
- if (iswspace(lexer->lookahead)) {
- lexer->result_symbol = EXTGLOB_PATTERN;
- return true;
- }
- }
-
- if (iswspace(lexer->lookahead)) {
- lexer->mark_end(lexer);
- lexer->result_symbol = EXTGLOB_PATTERN;
- scanner->last_glob_paren_depth = 0;
- return true;
- }
-
- if (lexer->lookahead == '$') {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '{' || lexer->lookahead == '(') {
- lexer->result_symbol = EXTGLOB_PATTERN;
- return true;
- }
- }
-
- if (lexer->lookahead == '|') {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '\\' || lexer->lookahead == '\r' ||
- lexer->lookahead == '\n') {
- lexer->result_symbol = EXTGLOB_PATTERN;
- return true;
- }
- }
-
- if (!isalnum(lexer->lookahead) && lexer->lookahead != '(' &&
- lexer->lookahead != '"' && lexer->lookahead != '[' &&
- lexer->lookahead != '?' && lexer->lookahead != '/' &&
- lexer->lookahead != '\\' && lexer->lookahead != '_') {
- return false;
- }
-
- typedef struct {
- bool done;
- uint32_t paren_depth;
- uint32_t bracket_depth;
- uint32_t brace_depth;
- } State;
-
- State state = {false, scanner->last_glob_paren_depth, 0, 0};
- while (!state.done) {
- switch (lexer->lookahead) {
- case '\0':
- return false;
- case '(':
- state.paren_depth++;
- break;
- case '[':
- state.bracket_depth++;
- break;
- case '{':
- state.brace_depth++;
- break;
- case ')':
- if (state.paren_depth == 0) {
- state.done = true;
- }
- state.paren_depth--;
- break;
- case ']':
- if (state.bracket_depth == 0) {
- state.done = true;
- }
- state.bracket_depth--;
- break;
- case '}':
- if (state.brace_depth == 0) {
- state.done = true;
- }
- state.brace_depth--;
- break;
- }
-
- if (!state.done) {
- bool was_space = iswspace(lexer->lookahead);
- if (lexer->lookahead == '$') {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '(' ||
- lexer->lookahead == '{') {
- lexer->result_symbol = EXTGLOB_PATTERN;
- scanner->last_glob_paren_depth = state.paren_depth;
- return true;
- }
- }
- if (was_space) {
- lexer->mark_end(lexer);
- lexer->result_symbol = EXTGLOB_PATTERN;
- scanner->last_glob_paren_depth = 0;
- return true;
- }
- if (lexer->lookahead == '"') {
- lexer->mark_end(lexer);
- lexer->result_symbol = EXTGLOB_PATTERN;
- scanner->last_glob_paren_depth = 0;
- return true;
- }
- if (lexer->lookahead == '\\') {
- advance(lexer);
- if (iswspace(lexer->lookahead) ||
- lexer->lookahead == '"') {
- advance(lexer);
- }
- } else {
- advance(lexer);
- }
- if (!was_space) {
- lexer->mark_end(lexer);
- }
- }
- }
-
- lexer->result_symbol = EXTGLOB_PATTERN;
- scanner->last_glob_paren_depth = 0;
- return true;
- }
- scanner->last_glob_paren_depth = 0;
-
- return false;
- }
-
-expansion_word:
- if (valid_symbols[EXPANSION_WORD]) {
- bool advanced_once = false;
- bool advance_once_space = false;
- for (;;) {
- if (lexer->lookahead == '\"') {
- return false;
- }
- if (lexer->lookahead == '$') {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '{' || lexer->lookahead == '(' ||
- lexer->lookahead == '\'' || iswalnum(lexer->lookahead)) {
- lexer->result_symbol = EXPANSION_WORD;
- return advanced_once;
- }
- advanced_once = true;
- }
-
- if (lexer->lookahead == '}') {
- lexer->mark_end(lexer);
- lexer->result_symbol = EXPANSION_WORD;
- return advanced_once || advance_once_space;
- }
-
- if (lexer->lookahead == '(' &&
- !(advanced_once || advance_once_space)) {
- lexer->mark_end(lexer);
- advance(lexer);
- while (lexer->lookahead != ')' && !lexer->eof(lexer)) {
- // if we find a $( or ${ assume this is valid and is a
- // garbage concatenation of some weird word + an expansion
- // I wonder where this can fail
- if (lexer->lookahead == '$') {
- lexer->mark_end(lexer);
- advance(lexer);
- if (lexer->lookahead == '{' ||
- lexer->lookahead == '(' ||
- lexer->lookahead == '\'' ||
- iswalnum(lexer->lookahead)) {
- lexer->result_symbol = EXPANSION_WORD;
- return advanced_once;
- }
- advanced_once = true;
- } else {
- advanced_once =
- advanced_once || !iswspace(lexer->lookahead);
- advance_once_space =
- advance_once_space || iswspace(lexer->lookahead);
- advance(lexer);
- }
- }
- lexer->mark_end(lexer);
- if (lexer->lookahead == ')') {
- advanced_once = true;
- advance(lexer);
- lexer->mark_end(lexer);
- if (lexer->lookahead == '}') {
- return false;
- }
- } else {
- return false;
- }
- }
-
- if (lexer->lookahead == '\'') {
- return false;
- }
-
- if (lexer->eof(lexer)) {
- return false;
- }
- advanced_once = advanced_once || !iswspace(lexer->lookahead);
- advance_once_space =
- advance_once_space || iswspace(lexer->lookahead);
- advance(lexer);
- }
- }
-
-brace_start:
- if (valid_symbols[BRACE_START] && !in_error_recovery(valid_symbols)) {
- while (iswspace(lexer->lookahead)) {
- skip(lexer);
- }
-
- if (lexer->lookahead != '{') {
- return false;
- }
-
- advance(lexer);
- lexer->mark_end(lexer);
-
- while (isdigit(lexer->lookahead)) {
- advance(lexer);
- }
-
- if (lexer->lookahead != '.') {
- return false;
- }
- advance(lexer);
-
- if (lexer->lookahead != '.') {
- return false;
- }
- advance(lexer);
-
- while (isdigit(lexer->lookahead)) {
- advance(lexer);
- }
-
- if (lexer->lookahead != '}') {
- return false;
- }
-
- lexer->result_symbol = BRACE_START;
- return true;
- }
-
- return false;
-}
-
-void *tree_sitter_bash_external_scanner_create() {
- Scanner *scanner = calloc(1, sizeof(Scanner));
- scanner->heredoc_delimiter = string_new();
- scanner->current_leading_word = string_new();
- return scanner;
-}
-
-bool tree_sitter_bash_external_scanner_scan(void *payload, TSLexer *lexer,
- const bool *valid_symbols) {
- Scanner *scanner = (Scanner *)payload;
- return scan(scanner, lexer, valid_symbols);
-}
-
-unsigned tree_sitter_bash_external_scanner_serialize(void *payload,
- char *state) {
- Scanner *scanner = (Scanner *)payload;
- return serialize(scanner, state);
-}
-
-void tree_sitter_bash_external_scanner_deserialize(void *payload,
- const char *state,
- unsigned length) {
- Scanner *scanner = (Scanner *)payload;
- deserialize(scanner, state, length);
-}
-
-void tree_sitter_bash_external_scanner_destroy(void *payload) {
- Scanner *scanner = (Scanner *)payload;
- STRING_FREE(scanner->heredoc_delimiter);
- STRING_FREE(scanner->current_leading_word);
- free(scanner);
-}