9#ifndef FORTRAN_PARSER_CHARACTERS_H_
10#define FORTRAN_PARSER_CHARACTERS_H_
23extern bool useHexadecimalEscapeSequences;
30enum class Encoding { LATIN_1, UTF_8 };
32inline constexpr bool IsUpperCaseLetter(
char ch) {
33 return ch >=
'A' && ch <=
'Z';
36inline constexpr bool IsLowerCaseLetter(
char ch) {
37 return ch >=
'a' && ch <=
'z';
40inline constexpr bool IsLetter(
char ch) {
41 return IsUpperCaseLetter(ch) || IsLowerCaseLetter(ch);
44inline constexpr bool IsDecimalDigit(
char ch) {
return ch >=
'0' && ch <=
'9'; }
46inline constexpr bool IsHexadecimalDigit(
char ch) {
47 return (ch >=
'0' && ch <=
'9') || (ch >=
'A' && ch <=
'F') ||
48 (ch >=
'a' && ch <=
'f');
51inline constexpr bool IsOctalDigit(
char ch) {
return ch >=
'0' && ch <=
'7'; }
53inline constexpr bool IsLegalIdentifierStart(
char ch) {
54 return IsLetter(ch) || ch ==
'_' || ch ==
'@' || ch ==
'$';
57inline constexpr bool IsLegalInIdentifier(
char ch) {
58 return IsLegalIdentifierStart(ch) || IsDecimalDigit(ch);
61inline constexpr bool IsPrintable(
char ch) {
return ch >=
' ' && ch <=
'~'; }
63inline constexpr bool IsWhiteSpace(
char ch) {
64 return ch ==
' ' || ch ==
'\t' || ch ==
'\n' || ch ==
'\v' || ch ==
'\f' ||
68inline constexpr char ToLowerCaseLetter(
char ch) {
69 return IsUpperCaseLetter(ch) ? ch -
'A' +
'a' : ch;
72inline constexpr char ToLowerCaseLetter(
char &&ch) {
73 return IsUpperCaseLetter(ch) ? ch -
'A' +
'a' : ch;
76inline std::string ToLowerCaseLetters(std::string_view str) {
77 std::string lowered{str};
78 for (
char &ch : lowered) {
79 ch = ToLowerCaseLetter(ch);
84inline constexpr char ToUpperCaseLetter(
char ch) {
85 return IsLowerCaseLetter(ch) ? ch -
'a' +
'A' : ch;
88inline constexpr char ToUpperCaseLetter(
char &&ch) {
89 return IsLowerCaseLetter(ch) ? ch -
'a' +
'A' : ch;
92inline std::string ToUpperCaseLetters(std::string_view str) {
93 std::string raised{str};
94 for (
char &ch : raised) {
95 ch = ToUpperCaseLetter(ch);
100inline constexpr bool IsSameApartFromCase(
char x,
char y) {
101 return ToLowerCaseLetter(x) == ToLowerCaseLetter(y);
104inline constexpr char DecimalDigitValue(
char ch) {
return ch -
'0'; }
106inline constexpr char HexadecimalDigitValue(
char ch) {
107 return IsUpperCaseLetter(ch) ? ch -
'A' + 10
108 : IsLowerCaseLetter(ch) ? ch -
'a' + 10
109 : DecimalDigitValue(ch);
112inline constexpr std::optional<char> BackslashEscapeValue(
char ch) {
137inline constexpr std::optional<char> BackslashEscapeChar(
char ch) {
163inline constexpr bool IsValidFortranTokenCharacter(
char ch) {
187 return IsLegalIdentifierStart(ch) || IsDecimalDigit(ch);
192 static constexpr int maxEncodingBytes{6};
193 char buffer[maxEncodingBytes];
203template <Encoding ENCODING,
typename STRING>
204std::string EncodeString(
const STRING &);
205extern template std::string EncodeString<Encoding::LATIN_1, std::string>(
206 const std::string &);
207extern template std::string EncodeString<Encoding::UTF_8, std::u32string>(
208 const std::u32string &);
212template <
typename NORMAL,
typename INSERTED>
213void EmitQuotedChar(
char32_t ch,
const NORMAL &emit,
const INSERTED &insert,
214 bool backslashEscapes =
true, Encoding encoding = Encoding::UTF_8) {
215 auto emitOneByte{[&](std::uint8_t ch) {
216 if (backslashEscapes && (ch <
' ' || ch >= 0x7f || ch ==
'\\')) {
217 if (std::optional<char> escape{BackslashEscapeChar(ch)}) {
220 }
else if (useHexadecimalEscapeSequences) {
223 int top{ch >> 4}, bottom{ch & 0xf};
224 insert(top > 9 ?
'a' + top - 10 :
'0' + top);
225 insert(bottom > 9 ?
'a' + bottom - 10 :
'0' + bottom);
229 insert(
'0' + (ch >> 6));
230 insert(
'0' + ((ch >> 3) & 7));
231 insert(
'0' + (ch & 7));
233 }
else if (ch ==
'\n') {
242 }
else if (backslashEscapes && useHexadecimalEscapeSequences) {
246 unsigned c1{(ch >> 28) & 0xf}, c2{(ch >> 24) & 0xf}, c3{(ch >> 20) & 0xf},
247 c4{(ch >> 16) & 0xf};
248 insert(c1 > 9 ?
'a' + c1 - 10 :
'0' + c1);
249 insert(c2 > 9 ?
'a' + c2 - 10 :
'0' + c2);
250 insert(c3 > 9 ?
'a' + c3 - 10 :
'0' + c3);
251 insert(c4 > 9 ?
'a' + c4 - 10 :
'0' + c4);
253 unsigned c1{(ch >> 12) & 0xf}, c2{(ch >> 8) & 0xf}, c3{(ch >> 4) & 0xf},
255 insert(c1 > 9 ?
'a' + c1 - 10 :
'0' + c1);
256 insert(c2 > 9 ?
'a' + c2 - 10 :
'0' + c2);
257 insert(c3 > 9 ?
'a' + c3 - 10 :
'0' + c3);
258 insert(c4 > 9 ?
'a' + c4 - 10 :
'0' + c4);
260 EncodedCharacter encoded{EncodeCharacter(encoding, ch)};
261 for (
int j{0}; j < encoded.bytes; ++j) {
262 emitOneByte(encoded.buffer[j]);
267std::string QuoteCharacterLiteral(
const std::string &,
268 bool backslashEscapes =
true, Encoding = Encoding::LATIN_1);
269std::string QuoteCharacterLiteral(
const std::u16string &,
270 bool backslashEscapes =
true, Encoding = Encoding::UTF_8);
271std::string QuoteCharacterLiteral(
const std::u32string &,
272 bool backslashEscapes =
true, Encoding = Encoding::UTF_8);
274int UTF_8CharacterBytes(
const char *);
277 char32_t codepoint{0};
281template <Encoding ENCODING>
285 const char *, std::size_t);
288DecodedCharacter DecodeRawCharacter<Encoding::UTF_8>(
const char *, std::size_t);
291template <Encoding ENCODING>
293 const char *, std::size_t,
bool backslashEscapes);
295 const char *, std::size_t,
bool);
297 const char *, std::size_t,
bool);
300 Encoding,
const char *, std::size_t,
bool backslashEscapes);
302template <
typename RESULT, Encoding ENCODING>
303RESULT DecodeString(
const std::string &,
bool backslashEscapes);
304extern template std::string DecodeString<std::string, Encoding::LATIN_1>(
305 const std::string &,
bool);
306extern template std::u16string DecodeString<std::u16string, Encoding::UTF_8>(
307 const std::string &,
bool);
308extern template std::u32string DecodeString<std::u32string, Encoding::UTF_8>(
309 const std::string &,
bool);
Definition: check-expression.h:19
Definition: characters.h:276
Definition: characters.h:191