lexer.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /*
  2. * This file is part of the MicroPython project, http://micropython.org/
  3. *
  4. * The MIT License (MIT)
  5. *
  6. * Copyright (c) 2013, 2014 Damien P. George
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #ifndef MICROPY_INCLUDED_PY_LEXER_H
  27. #define MICROPY_INCLUDED_PY_LEXER_H
  28. #include <stdint.h>
  29. #include "py/mpconfig.h"
  30. #include "py/qstr.h"
  31. #include "py/reader.h"
  32. /* lexer.h -- simple tokeniser for MicroPython
  33. *
  34. * Uses (byte) length instead of null termination.
  35. * Tokens are the same - UTF-8 with (byte) length.
  36. */
  37. typedef enum _mp_token_kind_t {
  38. MP_TOKEN_END,
  39. MP_TOKEN_INVALID,
  40. MP_TOKEN_DEDENT_MISMATCH,
  41. MP_TOKEN_LONELY_STRING_OPEN,
  42. #if MICROPY_PY_FSTRINGS
  43. MP_TOKEN_MALFORMED_FSTRING,
  44. MP_TOKEN_FSTRING_RAW,
  45. #endif
  46. MP_TOKEN_NEWLINE,
  47. MP_TOKEN_INDENT,
  48. MP_TOKEN_DEDENT,
  49. MP_TOKEN_NAME,
  50. MP_TOKEN_INTEGER,
  51. MP_TOKEN_FLOAT_OR_IMAG,
  52. MP_TOKEN_STRING,
  53. MP_TOKEN_BYTES,
  54. MP_TOKEN_ELLIPSIS,
  55. MP_TOKEN_KW_FALSE,
  56. MP_TOKEN_KW_NONE,
  57. MP_TOKEN_KW_TRUE,
  58. MP_TOKEN_KW___DEBUG__,
  59. MP_TOKEN_KW_AND,
  60. MP_TOKEN_KW_AS,
  61. MP_TOKEN_KW_ASSERT,
  62. #if MICROPY_PY_ASYNC_AWAIT
  63. MP_TOKEN_KW_ASYNC,
  64. MP_TOKEN_KW_AWAIT,
  65. #endif
  66. MP_TOKEN_KW_BREAK,
  67. MP_TOKEN_KW_CLASS,
  68. MP_TOKEN_KW_CONTINUE,
  69. MP_TOKEN_KW_DEF,
  70. MP_TOKEN_KW_DEL,
  71. MP_TOKEN_KW_ELIF,
  72. MP_TOKEN_KW_ELSE,
  73. MP_TOKEN_KW_EXCEPT,
  74. MP_TOKEN_KW_FINALLY,
  75. MP_TOKEN_KW_FOR,
  76. MP_TOKEN_KW_FROM,
  77. MP_TOKEN_KW_GLOBAL,
  78. MP_TOKEN_KW_IF,
  79. MP_TOKEN_KW_IMPORT,
  80. MP_TOKEN_KW_IN,
  81. MP_TOKEN_KW_IS,
  82. MP_TOKEN_KW_LAMBDA,
  83. MP_TOKEN_KW_NONLOCAL,
  84. MP_TOKEN_KW_NOT,
  85. MP_TOKEN_KW_OR,
  86. MP_TOKEN_KW_PASS,
  87. MP_TOKEN_KW_RAISE,
  88. MP_TOKEN_KW_RETURN,
  89. MP_TOKEN_KW_TRY,
  90. MP_TOKEN_KW_WHILE,
  91. MP_TOKEN_KW_WITH,
  92. MP_TOKEN_KW_YIELD,
  93. MP_TOKEN_OP_ASSIGN,
  94. MP_TOKEN_OP_TILDE,
  95. // Order of these 6 matches corresponding mp_binary_op_t operator
  96. MP_TOKEN_OP_LESS,
  97. MP_TOKEN_OP_MORE,
  98. MP_TOKEN_OP_DBL_EQUAL,
  99. MP_TOKEN_OP_LESS_EQUAL,
  100. MP_TOKEN_OP_MORE_EQUAL,
  101. MP_TOKEN_OP_NOT_EQUAL,
  102. // Order of these 13 matches corresponding mp_binary_op_t operator
  103. MP_TOKEN_OP_PIPE,
  104. MP_TOKEN_OP_CARET,
  105. MP_TOKEN_OP_AMPERSAND,
  106. MP_TOKEN_OP_DBL_LESS,
  107. MP_TOKEN_OP_DBL_MORE,
  108. MP_TOKEN_OP_PLUS,
  109. MP_TOKEN_OP_MINUS,
  110. MP_TOKEN_OP_STAR,
  111. MP_TOKEN_OP_AT,
  112. MP_TOKEN_OP_DBL_SLASH,
  113. MP_TOKEN_OP_SLASH,
  114. MP_TOKEN_OP_PERCENT,
  115. MP_TOKEN_OP_DBL_STAR,
  116. // Order of these 13 matches corresponding mp_binary_op_t operator
  117. MP_TOKEN_DEL_PIPE_EQUAL,
  118. MP_TOKEN_DEL_CARET_EQUAL,
  119. MP_TOKEN_DEL_AMPERSAND_EQUAL,
  120. MP_TOKEN_DEL_DBL_LESS_EQUAL,
  121. MP_TOKEN_DEL_DBL_MORE_EQUAL,
  122. MP_TOKEN_DEL_PLUS_EQUAL,
  123. MP_TOKEN_DEL_MINUS_EQUAL,
  124. MP_TOKEN_DEL_STAR_EQUAL,
  125. MP_TOKEN_DEL_AT_EQUAL,
  126. MP_TOKEN_DEL_DBL_SLASH_EQUAL,
  127. MP_TOKEN_DEL_SLASH_EQUAL,
  128. MP_TOKEN_DEL_PERCENT_EQUAL,
  129. MP_TOKEN_DEL_DBL_STAR_EQUAL,
  130. MP_TOKEN_DEL_PAREN_OPEN,
  131. MP_TOKEN_DEL_PAREN_CLOSE,
  132. MP_TOKEN_DEL_BRACKET_OPEN,
  133. MP_TOKEN_DEL_BRACKET_CLOSE,
  134. MP_TOKEN_DEL_BRACE_OPEN,
  135. MP_TOKEN_DEL_BRACE_CLOSE,
  136. MP_TOKEN_DEL_COMMA,
  137. MP_TOKEN_DEL_COLON,
  138. MP_TOKEN_DEL_PERIOD,
  139. MP_TOKEN_DEL_SEMICOLON,
  140. MP_TOKEN_DEL_EQUAL,
  141. MP_TOKEN_DEL_MINUS_MORE,
  142. } mp_token_kind_t;
  143. // this data structure is exposed for efficiency
  144. // public members are: source_name, tok_line, tok_column, tok_kind, vstr
  145. typedef struct _mp_lexer_t {
  146. qstr source_name; // name of source
  147. mp_reader_t reader; // stream source
  148. unichar chr0, chr1, chr2; // current cached characters from source
  149. #if MICROPY_PY_FSTRINGS
  150. unichar chr0_saved, chr1_saved, chr2_saved; // current cached characters from alt source
  151. #endif
  152. size_t line; // current source line
  153. size_t column; // current source column
  154. mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
  155. mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
  156. size_t alloc_indent_level;
  157. size_t num_indent_level;
  158. uint16_t *indent_level;
  159. size_t tok_line; // token source line
  160. size_t tok_column; // token source column
  161. mp_token_kind_t tok_kind; // token kind
  162. vstr_t vstr; // token data
  163. #if MICROPY_PY_FSTRINGS
  164. vstr_t fstring_args; // extracted arguments to pass to .format()
  165. size_t fstring_args_idx; // how many bytes of fstring_args have been read
  166. #endif
  167. } mp_lexer_t;
  168. mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
  169. mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, size_t len, size_t free_len);
  170. // If MICROPY_READER_POSIX or MICROPY_READER_VFS aren't enabled then
  171. // this function must be implemented by the port.
  172. mp_lexer_t *mp_lexer_new_from_file(qstr filename);
  173. #if MICROPY_HELPER_LEXER_UNIX
  174. mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd);
  175. #endif
  176. void mp_lexer_free(mp_lexer_t *lex);
  177. void mp_lexer_to_next(mp_lexer_t *lex);
  178. #endif // MICROPY_INCLUDED_PY_LEXER_H