fennec
Loading...
Searching...
No Matches
tokenizer.h
Go to the documentation of this file.
1// =====================================================================================================================
2// fennec, a free and open source game engine
3// Copyright © 2025 Medusa Slockbower
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU General Public License for more details.
14//
15// You should have received a copy of the GNU General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17// =====================================================================================================================
18
30
31#ifndef FENNEC_INTERPRET_TOKENIZER_H
32#define FENNEC_INTERPRET_TOKENIZER_H
33
37#include <fennec/string/string.h>
38
39//
40// escape sequences are tricky, sometimes they must be separated by white space,
41// other times they don't. Requiring a list of all possible escape sequences is unrealistic.
42// We need to allow the user of this struct to specify rules for escape sequences. Here are some basic rules:
43//
44// An escape sequence is marked by an escape character, e.g. %, \, {{
45// Multiple escape characters may be used in a single tokenizer and will have different rules
46// Escape characters may also be operators, brackets, or quotes
47// Escape sequences may contain operators, brackets, or quotes
48//
49// Here are a few examples of escape sequences from various formats and languages
50// C: \\, \n, \0, \u200b
51// PrintF: %s, %2.2f
52// Python FMT: {{, }}
53// SPSS: ''
54//
55
56namespace fennec
57{
58
59struct escape_sequence {
60 virtual size_t operator[](const string& str, size_t i) = 0;
61};
62
63struct tokenizer {
64 using escseq = escape_sequence*;
65 using escmap = map<char, escape_sequence*>;
66
67 string delimiter; // markers that separate tokens
68 string operators; // operators are treated as individual tokens
69 string brackets; // characters that mark brackets
70 string quotes; // characters that mark a string sequence, entire string sequence is treated as one token
71 escmap escapes; // characters that mark the start of an escape sequence and validate them
72 bool numbers; // Anything that resembles a number
73
74 enum token_ : uint8_t {
75 token_text = 0,
76 token_integer,
77 token_string,
78 token_newline,
79 token_escaped,
80 token_operator,
81 token_bracket,
82 token_quoted,
83
84 num_token_types
85 };
86
87 using token = pair<string, uint8_t>;
88
89private:
90 static constexpr uint8_t token_delimiter = num_token_types;
91
92 constexpr list<token> operator()(const string& line) {
93 list<token> res;
94 priority_queue<pair<size_t, uint8_t>> idx;
95
96 for (char c : delimiter) {
97 size_t i = 0;
98 while (i != line.size()) {
99 size_t n = line.find(c, i);
100 // TODO
101 }
102 }
103
104 return res;
105 }
106
107private:
108};
109
110}
111
112#endif // FENNEC_INTERPRET_TOKENIZER_H
A header containing the definition for a linked list of values.
A header containing the definition for a mapping of keys to values.
::uint8_t uint8_t
Unsigned 8-bit integer.
Definition types.h:272