FLANG
format.h
1//===-- include/flang/Common/format.h ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef FORTRAN_COMMON_FORMAT_H_
10#define FORTRAN_COMMON_FORMAT_H_
11
12#include "Fortran-consts.h"
13#include "enum-set.h"
14#include <cstring>
15#include <limits>
16
17// Define a FormatValidator class template to validate a format expression
18// of a given CHAR type. To enable use in runtime library code as well as
19// compiler code, the implementation does its own parsing without recourse
20// to compiler parser machinery, and avoids features that require C++ runtime
21// library support. A format expression is a pointer to a fixed size
22// character string, with an explicit length. Class function Check analyzes
23// the expression for syntax and semantic errors and warnings. When an error
24// or warning is found, a caller-supplied reporter function is called, which
25// may request early termination of validation analysis when some threshold
26// number of errors have been reported. If the context is a READ, WRITE,
27// or PRINT statement, rather than a FORMAT statement, statement-specific
28// checks are also done.
29
30namespace Fortran::common {
31
32// AddOverflow and MulOverflow are copied from
33// llvm/include/llvm/Support/MathExtras.h and specialised to int64_t.
34
35// __has_builtin is not defined in some compilers. Make sure it is defined.
36#ifndef __has_builtin
37#define __has_builtin(x) 0
38#endif
39
42static inline bool AddOverflow(
43 std::int64_t x, std::int64_t y, std::int64_t &result) {
44#if __has_builtin(__builtin_add_overflow)
45 return __builtin_add_overflow(x, y, &result);
46#else
47 // Perform the unsigned addition.
48 const std::uint64_t ux{static_cast<std::uint64_t>(x)};
49 const std::uint64_t uy{static_cast<std::uint64_t>(y)};
50 const std::uint64_t uresult{ux + uy};
51
52 // Convert to signed.
53 result = static_cast<std::int64_t>(uresult);
54
55 // Adding two positive numbers should result in a positive number.
56 if (x > 0 && y > 0) {
57 return result <= 0;
58 }
59 // Adding two negatives should result in a negative number.
60 if (x < 0 && y < 0) {
61 return result >= 0;
62 }
63 return false;
64#endif
65}
66
69static inline bool MulOverflow(
70 std::int64_t x, std::int64_t y, std::int64_t &result) {
71#if __has_builtin(__builtin_mul_overflow)
72 return __builtin_mul_overflow(x, y, &result);
73#else
74 // Perform the unsigned multiplication on absolute values.
75 const std::uint64_t ux{x < 0 ? (0 - static_cast<std::uint64_t>(x))
76 : static_cast<std::uint64_t>(x)};
77 const std::uint64_t uy{y < 0 ? (0 - static_cast<std::uint64_t>(y))
78 : static_cast<std::uint64_t>(y)};
79 const std::uint64_t uresult{ux * uy};
80
81 // Convert to signed.
82 const bool isNegative = (x < 0) ^ (y < 0);
83 result = isNegative ? (0 - uresult) : uresult;
84
85 // If any of the args was 0, result is 0 and no overflow occurs.
86 if (ux == 0 || uy == 0) {
87 return false;
88 }
89
90 // ux and uy are in [1, 2^n], where n is the number of digits.
91 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
92 // positive) divided by an argument compares to the other.
93 if (isNegative) {
94 return ux >
95 (static_cast<std::uint64_t>(std::numeric_limits<std::int64_t>::max()) +
96 std::uint64_t{1}) /
97 uy;
98 } else {
99 return ux >
100 (static_cast<std::uint64_t>(std::numeric_limits<std::int64_t>::max())) /
101 uy;
102 }
103#endif
104}
105
107 const char *text; // message text; may have one %s argument
108 const char *arg; // optional %s argument value
109 int offset; // offset to message marker
110 int length; // length of message marker
111 bool isError; // vs. warning
112};
113
114// This declaration is logically private to class FormatValidator.
115// It is placed here to work around a clang compilation problem.
116ENUM_CLASS(TokenKind, None, A, B, BN, BZ, D, DC, DP, DT, E, EN, ES, EX, F, G, I,
117 L, LZ, LZP, LZS, O, P, RC, RD, RN, RP, RU, RZ, S, SP, SS, T, TL, TR, X, Z,
118 Colon, Slash,
119 Backslash, // nonstandard: inhibit newline on output
120 Dollar, // nonstandard: inhibit newline on output on terminals
121 Star, LParen, RParen, Comma, Point, Sign,
122 UnsignedInteger, // value in integerValue_
123 String) // char-literal-constant or Hollerith constant
124
125template <typename CHAR = char> class FormatValidator {
126public:
127 using Reporter = std::function<bool(const FormatMessage &)>;
128 FormatValidator(const CHAR *format, size_t length, Reporter reporter,
129 IoStmtKind stmt = IoStmtKind::None)
130 : format_{format}, end_{format + length}, reporter_{reporter},
131 stmt_{stmt}, cursor_{format - 1} {
132 CHECK(format);
133 }
134
135 bool Check();
136 int maxNesting() const { return maxNesting_; }
137
138private:
139 common::EnumSet<TokenKind, TokenKind_enumSize> itemsWithLeadingInts_{
140 TokenKind::A, TokenKind::B, TokenKind::D, TokenKind::DT, TokenKind::E,
141 TokenKind::EN, TokenKind::ES, TokenKind::EX, TokenKind::F, TokenKind::G,
142 TokenKind::I, TokenKind::L, TokenKind::O, TokenKind::P, TokenKind::X,
143 TokenKind::Z, TokenKind::Slash, TokenKind::LParen};
144
145 struct Token {
146 Token &set_kind(TokenKind kind) {
147 kind_ = kind;
148 return *this;
149 }
150 Token &set_offset(int offset) {
151 offset_ = offset;
152 return *this;
153 }
154 Token &set_length(int length) {
155 length_ = length;
156 return *this;
157 }
158
159 TokenKind kind() const { return kind_; }
160 int offset() const { return offset_; }
161 int length() const { return length_; }
162
163 bool IsSet() { return kind_ != TokenKind::None; }
164
165 private:
166 TokenKind kind_{TokenKind::None};
167 int offset_{0};
168 int length_{1};
169 };
170
171 void ReportWarning(const char *text) { ReportWarning(text, token_); }
172 void ReportWarning(
173 const char *text, Token &token, const char *arg = nullptr) {
174 FormatMessage msg{
175 text, arg ? arg : argString_, token.offset(), token.length(), false};
176 reporterExit_ |= reporter_(msg);
177 }
178
179 void ReportError(const char *text) { ReportError(text, token_); }
180 void ReportError(const char *text, Token &token, const char *arg = nullptr) {
181 if (suppressMessageCascade_) {
182 return;
183 }
184 formatHasErrors_ = true;
185 suppressMessageCascade_ = true;
186 FormatMessage msg{
187 text, arg ? arg : argString_, token.offset(), token.length(), true};
188 reporterExit_ |= reporter_(msg);
189 }
190
191 void SetLength() { SetLength(token_); }
192 void SetLength(Token &token) {
193 token.set_length(cursor_ - format_ - token.offset() + (cursor_ < end_));
194 }
195
196 CHAR NextChar();
197 CHAR LookAheadChar();
198 void Advance(TokenKind);
199 void NextToken();
200
201 void check_r(bool allowed = true);
202 bool check_w();
203 void check_m();
204 bool check_d(bool checkScaleFactor = false);
205 void check_k();
206 void check_e();
207
208 const CHAR *const format_; // format text
209 const CHAR *const end_; // one-past-last of format_ text
210 Reporter reporter_;
211 IoStmtKind stmt_;
212
213 const CHAR *cursor_{}; // current location in format_
214 const CHAR *laCursor_{}; // lookahead cursor
215 Token previousToken_{};
216 Token token_{}; // current token
217 Token knrToken_{}; // k, n, or r UnsignedInteger token
218 Token scaleFactorToken_{}; // most recent scale factor token P
219 std::int64_t integerValue_{-1}; // value of UnsignedInteger token
220 std::int64_t knrValue_{-1}; // -1 ==> not present
221 std::int64_t scaleFactorValue_{}; // signed k in kP
222 std::int64_t wValue_{-1};
223 char argString_[4]{}; // 1-3 character msg arg; usually edit descriptor name
224 bool formatHasErrors_{false};
225 bool unterminatedFormatError_{false};
226 bool suppressMessageCascade_{false};
227 bool reporterExit_{false};
228 int maxNesting_{0}; // max level of nested parentheses
229};
230
231template <typename CHAR> static inline bool IsWhite(CHAR c) {
232 // White space. ' ' is standard. Other characters are extensions.
233 // Extension candidates:
234 // '\t' (horizontal tab)
235 // '\n' (new line)
236 // '\v' (vertical tab)
237 // '\f' (form feed)
238 // '\r' (carriage ret)
239 return c == ' ' || c == '\t' || c == '\v';
240}
241
242template <typename CHAR> CHAR FormatValidator<CHAR>::NextChar() {
243 for (++cursor_; cursor_ < end_; ++cursor_) {
244 if (!IsWhite(*cursor_)) {
245 return toupper(*cursor_);
246 }
247 }
248 cursor_ = end_; // don't allow cursor_ > end_
249 return ' ';
250}
251
252template <typename CHAR> CHAR FormatValidator<CHAR>::LookAheadChar() {
253 for (laCursor_ = cursor_ + 1; laCursor_ < end_; ++laCursor_) {
254 if (!IsWhite(*laCursor_)) {
255 return toupper(*laCursor_);
256 }
257 }
258 laCursor_ = end_; // don't allow laCursor_ > end_
259 return ' ';
260}
261
262// After a call to LookAheadChar, set token kind and advance cursor to laCursor.
263template <typename CHAR> void FormatValidator<CHAR>::Advance(TokenKind tk) {
264 cursor_ = laCursor_;
265 token_.set_kind(tk);
266}
267
268template <typename CHAR> void FormatValidator<CHAR>::NextToken() {
269 // At entry, cursor_ points before the start of the next token.
270 // At exit, cursor_ points to last CHAR of token_.
271
272 previousToken_ = token_;
273 CHAR c{NextChar()};
274 token_.set_kind(TokenKind::None);
275 token_.set_offset(cursor_ - format_);
276 token_.set_length(1);
277 if (c == '_' && integerValue_ >= 0) { // C1305, C1309, C1310, C1312, C1313
278 ReportError("Kind parameter '_' character in format expression");
279 }
280 integerValue_ = -1;
281
282 switch (c) {
283 case '0':
284 case '1':
285 case '2':
286 case '3':
287 case '4':
288 case '5':
289 case '6':
290 case '7':
291 case '8':
292 case '9': {
293 const CHAR *lastCursor{};
294 integerValue_ = 0;
295 bool overflow{false};
296 do {
297 lastCursor = cursor_;
298 if (!overflow) {
299 overflow = MulOverflow(
300 static_cast<std::int64_t>(10), integerValue_, integerValue_);
301 }
302 if (!overflow) {
303 overflow = AddOverflow(
304 integerValue_, static_cast<std::int64_t>(c - '0'), integerValue_);
305 }
306 c = NextChar();
307 } while (c >= '0' && c <= '9');
308 cursor_ = lastCursor;
309 token_.set_kind(TokenKind::UnsignedInteger);
310 if (overflow) {
311 SetLength();
312 ReportError("Integer overflow in format expression");
313 break;
314 }
315 if (LookAheadChar() != 'H') {
316 break;
317 }
318 // Hollerith constant
319 if (laCursor_ + integerValue_ < end_) {
320 token_.set_kind(TokenKind::String);
321 cursor_ = laCursor_ + integerValue_;
322 } else {
323 token_.set_kind(TokenKind::None);
324 cursor_ = end_;
325 }
326 SetLength();
327 if (stmt_ == IoStmtKind::Read) { // 13.3.2p6
328 ReportError("'H' edit descriptor in READ format expression");
329 } else if (token_.kind() == TokenKind::None) {
330 ReportError("Unterminated 'H' edit descriptor");
331 } else {
332 ReportWarning("Legacy 'H' edit descriptor");
333 }
334 break;
335 }
336 case 'A':
337 token_.set_kind(TokenKind::A);
338 break;
339 case 'B':
340 switch (LookAheadChar()) {
341 case 'N':
342 Advance(TokenKind::BN);
343 break;
344 case 'Z':
345 Advance(TokenKind::BZ);
346 break;
347 default:
348 token_.set_kind(TokenKind::B);
349 break;
350 }
351 break;
352 case 'D':
353 switch (LookAheadChar()) {
354 case 'C':
355 Advance(TokenKind::DC);
356 break;
357 case 'P':
358 Advance(TokenKind::DP);
359 break;
360 case 'T':
361 Advance(TokenKind::DT);
362 break;
363 default:
364 token_.set_kind(TokenKind::D);
365 break;
366 }
367 break;
368 case 'E':
369 switch (LookAheadChar()) {
370 case 'N':
371 Advance(TokenKind::EN);
372 break;
373 case 'S':
374 Advance(TokenKind::ES);
375 break;
376 case 'X':
377 Advance(TokenKind::EX);
378 break;
379 default:
380 token_.set_kind(TokenKind::E);
381 break;
382 }
383 break;
384 case 'F':
385 token_.set_kind(TokenKind::F);
386 break;
387 case 'G':
388 token_.set_kind(TokenKind::G);
389 break;
390 case 'I':
391 token_.set_kind(TokenKind::I);
392 break;
393 case 'L':
394 switch (LookAheadChar()) {
395 case 'Z':
396 // Advance past 'Z', then look ahead for 'S' or 'P'
397 Advance(TokenKind::LZ);
398 switch (LookAheadChar()) {
399 case 'S':
400 Advance(TokenKind::LZS);
401 break;
402 case 'P':
403 Advance(TokenKind::LZP);
404 break;
405 default:
406 break;
407 }
408 break;
409 default:
410 token_.set_kind(TokenKind::L);
411 break;
412 }
413 break;
414 case 'O':
415 token_.set_kind(TokenKind::O);
416 break;
417 case 'P':
418 token_.set_kind(TokenKind::P);
419 break;
420 case 'R':
421 switch (LookAheadChar()) {
422 case 'C':
423 Advance(TokenKind::RC);
424 break;
425 case 'D':
426 Advance(TokenKind::RD);
427 break;
428 case 'N':
429 Advance(TokenKind::RN);
430 break;
431 case 'P':
432 Advance(TokenKind::RP);
433 break;
434 case 'U':
435 Advance(TokenKind::RU);
436 break;
437 case 'Z':
438 Advance(TokenKind::RZ);
439 break;
440 default:
441 token_.set_kind(TokenKind::None);
442 break;
443 }
444 break;
445 case 'S':
446 switch (LookAheadChar()) {
447 case 'P':
448 Advance(TokenKind::SP);
449 break;
450 case 'S':
451 Advance(TokenKind::SS);
452 break;
453 default:
454 token_.set_kind(TokenKind::S);
455 break;
456 }
457 break;
458 case 'T':
459 switch (LookAheadChar()) {
460 case 'L':
461 Advance(TokenKind::TL);
462 break;
463 case 'R':
464 Advance(TokenKind::TR);
465 break;
466 default:
467 token_.set_kind(TokenKind::T);
468 break;
469 }
470 break;
471 case 'X':
472 token_.set_kind(TokenKind::X);
473 break;
474 case 'Z':
475 token_.set_kind(TokenKind::Z);
476 break;
477 case '-':
478 case '+':
479 token_.set_kind(TokenKind::Sign);
480 break;
481 case '/':
482 token_.set_kind(TokenKind::Slash);
483 break;
484 case '(':
485 token_.set_kind(TokenKind::LParen);
486 break;
487 case ')':
488 token_.set_kind(TokenKind::RParen);
489 break;
490 case '.':
491 token_.set_kind(TokenKind::Point);
492 break;
493 case ':':
494 token_.set_kind(TokenKind::Colon);
495 break;
496 case '\\':
497 token_.set_kind(TokenKind::Backslash);
498 break;
499 case '$':
500 token_.set_kind(TokenKind::Dollar);
501 break;
502 case '*':
503 token_.set_kind(LookAheadChar() == '(' ? TokenKind::Star : TokenKind::None);
504 break;
505 case ',': {
506 token_.set_kind(TokenKind::Comma);
507 CHAR laChar = LookAheadChar();
508 if (laChar == ',') {
509 Advance(TokenKind::Comma);
510 token_.set_offset(cursor_ - format_);
511 ReportError("Unexpected ',' in format expression");
512 } else if (laChar == ')') {
513 ReportError("Unexpected ',' before ')' in format expression");
514 }
515 break;
516 }
517 case '\'':
518 case '"':
519 for (++cursor_; cursor_ < end_; ++cursor_) {
520 if (*cursor_ == c) {
521 if (auto nc{cursor_ + 1}; nc < end_ && *nc != c) {
522 token_.set_kind(TokenKind::String);
523 break;
524 }
525 ++cursor_;
526 }
527 }
528 SetLength();
529 if (token_.kind() != TokenKind::String) {
530 ReportError("Unterminated string");
531 } else if (stmt_ == IoStmtKind::Read &&
532 previousToken_.kind() != TokenKind::DT) { // 13.3.2p6
533 ReportWarning("String edit descriptor in READ format expression");
534 }
535 break;
536 default:
537 if (cursor_ >= end_ && !unterminatedFormatError_) {
538 suppressMessageCascade_ = false;
539 ReportError("Unterminated format expression");
540 unterminatedFormatError_ = true;
541 }
542 token_.set_kind(TokenKind::None);
543 break;
544 }
545
546 SetLength();
547}
548
549template <typename CHAR> void FormatValidator<CHAR>::check_r(bool allowed) {
550 if (!allowed && knrValue_ >= 0) {
551 ReportError("Repeat specifier before '%s' edit descriptor", knrToken_);
552 } else if (knrValue_ == 0) {
553 ReportError("'%s' edit descriptor repeat specifier must be positive",
554 knrToken_); // C1304
555 }
556}
557
558// Return the predicate "w value is present" to control further processing.
559template <typename CHAR> bool FormatValidator<CHAR>::check_w() {
560 if (token_.kind() == TokenKind::UnsignedInteger) {
561 wValue_ = integerValue_;
562 if (wValue_ == 0) {
563 if (*argString_ == 'A' || stmt_ == IoStmtKind::Read) {
564 // C1306, 13.7.2.1p6
565 ReportError("'%s' edit descriptor 'w' value must be positive");
566 } else if (*argString_ == 'L') {
567 ReportWarning("'%s' edit descriptor 'w' value should be positive");
568 }
569 }
570 NextToken();
571 return true;
572 }
573 if (*argString_ != 'A' && *argString_ != 'L') {
574 ReportWarning("Expected '%s' edit descriptor 'w' value"); // C1306
575 }
576 return false;
577}
578
579template <typename CHAR> void FormatValidator<CHAR>::check_m() {
580 if (token_.kind() != TokenKind::Point) {
581 return;
582 }
583 NextToken();
584 if (token_.kind() != TokenKind::UnsignedInteger) {
585 ReportError("Expected '%s' edit descriptor 'm' value after '.'");
586 return;
587 }
588 if ((stmt_ == IoStmtKind::Print || stmt_ == IoStmtKind::Write) &&
589 wValue_ > 0 && integerValue_ > wValue_) { // 13.7.2.2p5, 13.7.2.4p6
590 ReportError("'%s' edit descriptor 'm' value is greater than 'w' value");
591 }
592 NextToken();
593}
594
595// Return the predicate "d value is present" to control further processing.
596template <typename CHAR>
597bool FormatValidator<CHAR>::check_d(bool checkScaleFactor) {
598 if (token_.kind() != TokenKind::Point) {
599 ReportError("Expected '%s' edit descriptor '.d' value");
600 return false;
601 }
602 NextToken();
603 if (token_.kind() != TokenKind::UnsignedInteger) {
604 ReportError("Expected '%s' edit descriptor 'd' value after '.'");
605 return false;
606 }
607 if (checkScaleFactor) {
608 check_k();
609 }
610 NextToken();
611 return true;
612}
613
614// Check the value of scale factor k against a field width d.
615template <typename CHAR> void FormatValidator<CHAR>::check_k() {
616 // Limit the check to D and E edit descriptors in output statements that
617 // explicitly set the scale factor.
618 if (stmt_ != IoStmtKind::Print && stmt_ != IoStmtKind::Write) {
619 return;
620 }
621 if (!scaleFactorToken_.IsSet()) {
622 return;
623 }
624 // 13.7.2.3.3p5 - The values of d and k must satisfy:
625 // −d < k <= 0; or
626 // 0 < k < d+2
627 const int64_t d{integerValue_};
628 const int64_t k{scaleFactorValue_};
629 // Exception: d = k = 0 is nonstandard, but has a reasonable interpretation.
630 if (d == 0 && k == 0) {
631 return;
632 }
633 if (k <= 0 && !(-d < k)) {
634 ReportError("Negative scale factor k (from kP) and width d in a '%s' "
635 "edit descriptor must satisfy '-d < k'");
636 } else if (k > 0 && !(k < d + 2)) {
637 ReportError("Positive scale factor k (from kP) and width d in a '%s' "
638 "edit descriptor must satisfy 'k < d+2'");
639 }
640}
641
642template <typename CHAR> void FormatValidator<CHAR>::check_e() {
643 if (token_.kind() != TokenKind::E) {
644 return;
645 }
646 NextToken();
647 if (token_.kind() != TokenKind::UnsignedInteger) {
648 ReportError("Expected '%s' edit descriptor 'e' value after 'E'");
649 return;
650 }
651 NextToken();
652}
653
654template <typename CHAR> bool FormatValidator<CHAR>::Check() {
655 if (!*format_) {
656 ReportError("Empty format expression");
657 return formatHasErrors_;
658 }
659 NextToken();
660 if (token_.kind() != TokenKind::LParen) {
661 ReportError("Format expression must have an initial '('");
662 return formatHasErrors_;
663 }
664 NextToken();
665
666 int nestLevel{0}; // Outer level ()s are at level 0.
667 Token starToken{}; // unlimited format token
668 bool hasDataEditDesc{false};
669
670 // Subject to error recovery exceptions, a loop iteration processes one
671 // edit descriptor or does list management. The loop terminates when
672 // - a level-0 right paren is processed (format may be valid)
673 // - the end of an incomplete format is reached
674 // - the error reporter requests termination (error threshold reached)
675 while (!reporterExit_) {
676 Token signToken{};
677 knrValue_ = -1; // -1 ==> not present
678 wValue_ = -1;
679 bool commaRequired{true};
680
681 if (token_.kind() == TokenKind::Sign) {
682 signToken = token_;
683 NextToken();
684 }
685 if (token_.kind() == TokenKind::UnsignedInteger) {
686 knrToken_ = token_;
687 knrValue_ = integerValue_;
688 NextToken();
689 }
690 if (signToken.IsSet() && (knrValue_ < 0 || token_.kind() != TokenKind::P)) {
691 argString_[0] = format_[signToken.offset()];
692 argString_[1] = 0;
693 ReportError("Unexpected '%s' in format expression", signToken);
694 }
695 // Default message argument.
696 // Alphabetic edit descriptor names are one to three characters in length.
697 argString_[0] = toupper(format_[token_.offset()]);
698 if (token_.length() > 2) {
699 // Three-character descriptor names (e.g., LZP, LZS).
700 // token_.offset() has the first character and *cursor_ has the last;
701 // find the middle character by scanning past any blanks.
702 const CHAR *mid{format_ + token_.offset() + 1};
703 while (mid < cursor_ && IsWhite(*mid)) {
704 ++mid;
705 }
706 argString_[1] = toupper(*mid);
707 argString_[2] = toupper(*cursor_);
708 } else {
709 argString_[1] = token_.length() > 1 ? toupper(*cursor_) : 0;
710 argString_[2] = 0;
711 }
712 // Process one format edit descriptor or do format list management.
713 switch (token_.kind()) {
714 case TokenKind::A:
715 // R1307 data-edit-desc -> A [w]
716 hasDataEditDesc = true;
717 check_r();
718 NextToken();
719 check_w();
720 break;
721 case TokenKind::B:
722 case TokenKind::I:
723 case TokenKind::O:
724 case TokenKind::Z:
725 // R1307 data-edit-desc -> B w [. m] | I w [. m] | O w [. m] | Z w [. m]
726 hasDataEditDesc = true;
727 check_r();
728 NextToken();
729 if (check_w()) {
730 check_m();
731 }
732 break;
733 case TokenKind::D:
734 case TokenKind::F: {
735 // R1307 data-edit-desc -> D w . d | F w . d
736 bool isD{token_.kind() == TokenKind::D};
737 hasDataEditDesc = true;
738 check_r();
739 NextToken();
740 if (check_w()) {
741 check_d(/*checkScaleFactor=*/isD);
742 }
743 break;
744 }
745 case TokenKind::E:
746 case TokenKind::EN:
747 case TokenKind::ES:
748 case TokenKind::EX: {
749 // R1307 data-edit-desc ->
750 // E w . d [E e] | EN w . d [E e] | ES w . d [E e] | EX w . d [E e]
751 bool isE{token_.kind() == TokenKind::E};
752 hasDataEditDesc = true;
753 check_r();
754 NextToken();
755 if (check_w() && check_d(/*checkScaleFactor=*/isE)) {
756 check_e();
757 }
758 break;
759 }
760 case TokenKind::G:
761 // R1307 data-edit-desc -> G w [. d [E e]]
762 hasDataEditDesc = true;
763 check_r();
764 NextToken();
765 if (check_w()) {
766 if (wValue_ > 0) {
767 if (check_d()) { // C1307
768 check_e();
769 }
770 } else if (token_.kind() == TokenKind::Point && check_d() &&
771 token_.kind() == TokenKind::E) { // C1308
772 ReportError("A 'G0' edit descriptor must not have an 'e' value");
773 NextToken();
774 if (token_.kind() == TokenKind::UnsignedInteger) {
775 NextToken();
776 }
777 }
778 }
779 break;
780 case TokenKind::L:
781 // R1307 data-edit-desc -> L w
782 hasDataEditDesc = true;
783 check_r();
784 NextToken();
785 check_w();
786 break;
787 case TokenKind::DT:
788 // R1307 data-edit-desc -> DT [char-literal-constant] [( v-list )]
789 hasDataEditDesc = true;
790 check_r();
791 NextToken();
792 if (token_.kind() == TokenKind::String) {
793 NextToken();
794 }
795 if (token_.kind() == TokenKind::LParen) {
796 do {
797 NextToken();
798 if (token_.kind() == TokenKind::Sign) {
799 NextToken();
800 }
801 if (token_.kind() != TokenKind::UnsignedInteger) {
802 ReportError(
803 "Expected integer constant in 'DT' edit descriptor v-list");
804 break;
805 }
806 NextToken();
807 } while (token_.kind() == TokenKind::Comma);
808 if (token_.kind() != TokenKind::RParen) {
809 ReportError("Expected ',' or ')' in 'DT' edit descriptor v-list");
810 while (cursor_ < end_ && token_.kind() != TokenKind::RParen) {
811 NextToken();
812 }
813 }
814 NextToken();
815 }
816 break;
817 case TokenKind::String:
818 // R1304 data-edit-desc -> char-string-edit-desc
819 if (knrValue_ >= 0) {
820 ReportError("Repeat specifier before character string edit descriptor",
821 knrToken_);
822 }
823 NextToken();
824 break;
825 case TokenKind::BN:
826 case TokenKind::BZ:
827 case TokenKind::DC:
828 case TokenKind::DP:
829 case TokenKind::LZ:
830 case TokenKind::LZS:
831 case TokenKind::LZP:
832 case TokenKind::RC:
833 case TokenKind::RD:
834 case TokenKind::RN:
835 case TokenKind::RP:
836 case TokenKind::RU:
837 case TokenKind::RZ:
838 case TokenKind::S:
839 case TokenKind::SP:
840 case TokenKind::SS:
841 // R1317 sign-edit-desc -> SS | SP | S
842 // R1318 blank-interp-edit-desc -> BN | BZ
843 // R1319 round-edit-desc -> RU | RD | RZ | RN | RC | RP
844 // R1320 decimal-edit-desc -> DC | DP
845 // F202X leading-zero-edit-desc -> LZ | LZS | LZP
846 check_r(false);
847 NextToken();
848 break;
849 case TokenKind::P: {
850 // R1313 control-edit-desc -> k P
851 if (knrValue_ < 0) {
852 ReportError("'P' edit descriptor must have a scale factor");
853 } else {
854 scaleFactorToken_ = knrToken_;
855 if (signToken.IsSet() && format_[signToken.offset()] == '-') {
856 scaleFactorValue_ = -knrValue_;
857 } else {
858 scaleFactorValue_ = knrValue_;
859 }
860 }
861 // Diagnosing C1302 may require multiple token lookahead.
862 // Save current cursor position to enable backup.
863 const CHAR *saveCursor{cursor_};
864 NextToken();
865 if (token_.kind() == TokenKind::UnsignedInteger) {
866 NextToken();
867 }
868 switch (token_.kind()) {
869 case TokenKind::D:
870 case TokenKind::E:
871 case TokenKind::EN:
872 case TokenKind::ES:
873 case TokenKind::EX:
874 case TokenKind::F:
875 case TokenKind::G:
876 commaRequired = false;
877 break;
878 default:;
879 }
880 cursor_ = saveCursor;
881 NextToken();
882 break;
883 }
884 case TokenKind::T:
885 case TokenKind::TL:
886 case TokenKind::TR:
887 // R1315 position-edit-desc -> T n | TL n | TR n
888 check_r(false);
889 NextToken();
890 if (integerValue_ <= 0) { // C1311
891 ReportError("'%s' edit descriptor must have a positive position value");
892 }
893 NextToken();
894 break;
895 case TokenKind::X:
896 // R1315 position-edit-desc -> n X
897 if (knrValue_ == 0) { // C1311
898 ReportError("'X' edit descriptor must have a positive position value",
899 knrToken_);
900 } else if (knrValue_ < 0) {
901 ReportWarning(
902 "'X' edit descriptor must have a positive position value");
903 }
904 NextToken();
905 break;
906 case TokenKind::Colon:
907 // R1313 control-edit-desc -> :
908 check_r(false);
909 commaRequired = false;
910 NextToken();
911 break;
912 case TokenKind::Slash:
913 // R1313 control-edit-desc -> [r] /
914 commaRequired = false;
915 NextToken();
916 break;
917 case TokenKind::Backslash:
918 check_r(false);
919 ReportWarning("Non-standard '\\' edit descriptor");
920 NextToken();
921 break;
922 case TokenKind::Dollar:
923 check_r(false);
924 ReportWarning("Non-standard '$' edit descriptor");
925 NextToken();
926 break;
927 case TokenKind::Star:
928 // NextToken assigns a token kind of Star only if * is followed by (.
929 // So the next token is guaranteed to be LParen.
930 if (nestLevel > 0) {
931 ReportError("Nested unlimited format item list");
932 }
933 starToken = token_;
934 if (knrValue_ >= 0) {
935 ReportError(
936 "Repeat specifier before unlimited format item list", knrToken_);
937 }
938 hasDataEditDesc = false;
939 NextToken();
940 [[fallthrough]];
941 case TokenKind::LParen:
942 if (knrValue_ == 0) {
943 ReportError("List repeat specifier must be positive", knrToken_);
944 }
945 if (++nestLevel > maxNesting_) {
946 maxNesting_ = nestLevel;
947 }
948 if (LookAheadChar() == ')') {
949 ReportError("Nested parenthesized format item list is empty");
950 }
951 break;
952 case TokenKind::RParen:
953 if (knrValue_ >= 0) {
954 ReportError("Unexpected integer constant", knrToken_);
955 }
956 do {
957 if (nestLevel == 0) {
958 // Any characters after level-0 ) are ignored.
959 return formatHasErrors_; // normal exit (may have messages)
960 }
961 if (nestLevel == 1 && starToken.IsSet() && !hasDataEditDesc) {
962 SetLength(starToken);
963 ReportError( // C1303
964 "Unlimited format item list must contain a data edit descriptor",
965 starToken);
966 }
967 --nestLevel;
968 NextToken();
969 } while (token_.kind() == TokenKind::RParen);
970 if (nestLevel == 0 && starToken.IsSet()) {
971 ReportError("Character in format after unlimited format item list");
972 }
973 break;
974 case TokenKind::Comma:
975 if (knrValue_ >= 0) {
976 ReportError("Unexpected integer constant", knrToken_);
977 }
978 if (suppressMessageCascade_ || reporterExit_) {
979 break;
980 }
981 [[fallthrough]];
982 default:
983 ReportError("Unexpected '%s' in format expression");
984 NextToken();
985 }
986
987 // Process comma separator and exit an incomplete format.
988 switch (token_.kind()) {
989 case TokenKind::Colon: // Comma not required; token not yet processed.
990 case TokenKind::Slash: // Comma not required; token not yet processed.
991 case TokenKind::RParen: // Comma not allowed; token not yet processed.
992 suppressMessageCascade_ = false;
993 break;
994 case TokenKind::LParen: // Comma not allowed; token already processed.
995 case TokenKind::Comma: // Normal comma case; move past token.
996 suppressMessageCascade_ = false;
997 NextToken();
998 break;
999 case TokenKind::Sign: // Error; main switch has a better message.
1000 case TokenKind::None: // Error; token not yet processed.
1001 if (cursor_ >= end_) {
1002 return formatHasErrors_; // incomplete format error exit
1003 }
1004 break;
1005 default:
1006 // Possible first token of the next format item; token not yet processed.
1007 if (commaRequired) {
1008 const char *s{"Expected ',' or ')' in format expression"}; // C1302
1009 if (previousToken_.kind() == TokenKind::UnsignedInteger &&
1010 previousToken_.length() > 1 &&
1011 itemsWithLeadingInts_.test(token_.kind())) {
1012 // F10.32F10.3 is ambiguous, F10.3F10.3 is not
1013 ReportError(s);
1014 } else {
1015 ReportWarning(s);
1016 }
1017 }
1018 }
1019 }
1020
1021 return formatHasErrors_; // error reporter (message threshold) exit
1022}
1023
1024} // namespace Fortran::common
1025#endif // FORTRAN_COMMON_FORMAT_H_
Definition bit-population-count.h:20
Definition format.h:106