FLANG
format.h
1//===-- include/flang/Common/format.h ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef FORTRAN_COMMON_FORMAT_H_
10#define FORTRAN_COMMON_FORMAT_H_
11
12#include "Fortran-consts.h"
13#include "enum-set.h"
14#include <cstring>
15#include <limits>
16
17// Define a FormatValidator class template to validate a format expression
18// of a given CHAR type. To enable use in runtime library code as well as
19// compiler code, the implementation does its own parsing without recourse
20// to compiler parser machinery, and avoids features that require C++ runtime
21// library support. A format expression is a pointer to a fixed size
22// character string, with an explicit length. Class function Check analyzes
23// the expression for syntax and semantic errors and warnings. When an error
24// or warning is found, a caller-supplied reporter function is called, which
25// may request early termination of validation analysis when some threshold
26// number of errors have been reported. If the context is a READ, WRITE,
27// or PRINT statement, rather than a FORMAT statement, statement-specific
28// checks are also done.
29
30namespace Fortran::common {
31
32// AddOverflow and MulOverflow are copied from
33// llvm/include/llvm/Support/MathExtras.h and specialised to int64_t.
34
35// __has_builtin is not defined in some compilers. Make sure it is defined.
36#ifndef __has_builtin
37#define __has_builtin(x) 0
38#endif
39
42static inline bool AddOverflow(
43 std::int64_t x, std::int64_t y, std::int64_t &result) {
44#if __has_builtin(__builtin_add_overflow)
45 return __builtin_add_overflow(x, y, &result);
46#else
47 // Perform the unsigned addition.
48 const std::uint64_t ux{static_cast<std::uint64_t>(x)};
49 const std::uint64_t uy{static_cast<std::uint64_t>(y)};
50 const std::uint64_t uresult{ux + uy};
51
52 // Convert to signed.
53 result = static_cast<std::int64_t>(uresult);
54
55 // Adding two positive numbers should result in a positive number.
56 if (x > 0 && y > 0) {
57 return result <= 0;
58 }
59 // Adding two negatives should result in a negative number.
60 if (x < 0 && y < 0) {
61 return result >= 0;
62 }
63 return false;
64#endif
65}
66
69static inline bool MulOverflow(
70 std::int64_t x, std::int64_t y, std::int64_t &result) {
71#if __has_builtin(__builtin_mul_overflow)
72 return __builtin_mul_overflow(x, y, &result);
73#else
74 // Perform the unsigned multiplication on absolute values.
75 const std::uint64_t ux{x < 0 ? (0 - static_cast<std::uint64_t>(x))
76 : static_cast<std::uint64_t>(x)};
77 const std::uint64_t uy{y < 0 ? (0 - static_cast<std::uint64_t>(y))
78 : static_cast<std::uint64_t>(y)};
79 const std::uint64_t uresult{ux * uy};
80
81 // Convert to signed.
82 const bool isNegative = (x < 0) ^ (y < 0);
83 result = isNegative ? (0 - uresult) : uresult;
84
85 // If any of the args was 0, result is 0 and no overflow occurs.
86 if (ux == 0 || uy == 0) {
87 return false;
88 }
89
90 // ux and uy are in [1, 2^n], where n is the number of digits.
91 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
92 // positive) divided by an argument compares to the other.
93 if (isNegative) {
94 return ux >
95 (static_cast<std::uint64_t>(std::numeric_limits<std::int64_t>::max()) +
96 std::uint64_t{1}) /
97 uy;
98 } else {
99 return ux >
100 (static_cast<std::uint64_t>(std::numeric_limits<std::int64_t>::max())) /
101 uy;
102 }
103#endif
104}
105
107 const char *text; // message text; may have one %s argument
108 const char *arg; // optional %s argument value
109 int offset; // offset to message marker
110 int length; // length of message marker
111 bool isError; // vs. warning
112};
113
114// This declaration is logically private to class FormatValidator.
115// It is placed here to work around a clang compilation problem.
116ENUM_CLASS(TokenKind, None, A, AT, B, BN, BZ, D, DC, DP, DT, E, EN, ES, EX, F,
117 G, I, L, LZ, LZP, LZS, O, P, RC, RD, RN, RP, RU, RZ, S, SP, SS, T, TL, TR,
118 X, Z, Colon, Slash,
119 Backslash, // nonstandard: inhibit newline on output
120 Dollar, // nonstandard: inhibit newline on output on terminals
121 Star, LParen, RParen, Comma, Point, Sign,
122 UnsignedInteger, // value in integerValue_
123 String) // char-literal-constant or Hollerith constant
124
125template <typename CHAR = char> class FormatValidator {
126public:
127 using Reporter = std::function<bool(const FormatMessage &)>;
128 FormatValidator(const CHAR *format, size_t length, Reporter reporter,
129 IoStmtKind stmt = IoStmtKind::None)
130 : format_{format}, end_{format + length}, reporter_{reporter},
131 stmt_{stmt}, cursor_{format - 1} {
132 CHECK(format);
133 }
134
135 bool Check();
136 int maxNesting() const { return maxNesting_; }
137
138private:
139 common::EnumSet<TokenKind, TokenKind_enumSize> itemsWithLeadingInts_{
140 TokenKind::A, TokenKind::AT, TokenKind::B, TokenKind::D, TokenKind::DT,
141 TokenKind::E, TokenKind::EN, TokenKind::ES, TokenKind::EX, TokenKind::F,
142 TokenKind::G, TokenKind::I, TokenKind::L, TokenKind::O, TokenKind::P,
143 TokenKind::X, TokenKind::Z, TokenKind::Slash, TokenKind::LParen};
144
145 struct Token {
146 Token &set_kind(TokenKind kind) {
147 kind_ = kind;
148 return *this;
149 }
150 Token &set_offset(int offset) {
151 offset_ = offset;
152 return *this;
153 }
154 Token &set_length(int length) {
155 length_ = length;
156 return *this;
157 }
158
159 TokenKind kind() const { return kind_; }
160 int offset() const { return offset_; }
161 int length() const { return length_; }
162
163 bool IsSet() { return kind_ != TokenKind::None; }
164
165 private:
166 TokenKind kind_{TokenKind::None};
167 int offset_{0};
168 int length_{1};
169 };
170
171 void ReportWarning(const char *text) { ReportWarning(text, token_); }
172 void ReportWarning(
173 const char *text, Token &token, const char *arg = nullptr) {
174 FormatMessage msg{
175 text, arg ? arg : argString_, token.offset(), token.length(), false};
176 reporterExit_ |= reporter_(msg);
177 }
178
179 void ReportError(const char *text) { ReportError(text, token_); }
180 void ReportError(const char *text, Token &token, const char *arg = nullptr) {
181 if (suppressMessageCascade_) {
182 return;
183 }
184 formatHasErrors_ = true;
185 suppressMessageCascade_ = true;
186 FormatMessage msg{
187 text, arg ? arg : argString_, token.offset(), token.length(), true};
188 reporterExit_ |= reporter_(msg);
189 }
190
191 void SetLength() { SetLength(token_); }
192 void SetLength(Token &token) {
193 token.set_length(cursor_ - format_ - token.offset() + (cursor_ < end_));
194 }
195
196 CHAR NextChar();
197 CHAR LookAheadChar();
198 void Advance(TokenKind);
199 void NextToken();
200
201 void check_r(bool allowed = true);
202 bool check_w();
203 void check_m();
204 bool check_d(bool checkScaleFactor = false);
205 void check_k();
206 void check_e();
207
208 const CHAR *const format_; // format text
209 const CHAR *const end_; // one-past-last of format_ text
210 Reporter reporter_;
211 IoStmtKind stmt_;
212
213 const CHAR *cursor_{}; // current location in format_
214 const CHAR *laCursor_{}; // lookahead cursor
215 Token previousToken_{};
216 Token token_{}; // current token
217 Token knrToken_{}; // k, n, or r UnsignedInteger token
218 Token scaleFactorToken_{}; // most recent scale factor token P
219 std::int64_t integerValue_{-1}; // value of UnsignedInteger token
220 std::int64_t knrValue_{-1}; // -1 ==> not present
221 std::int64_t scaleFactorValue_{}; // signed k in kP
222 std::int64_t wValue_{-1};
223 char argString_[4]{}; // 1-3 character msg arg; usually edit descriptor name
224 bool formatHasErrors_{false};
225 bool unterminatedFormatError_{false};
226 bool suppressMessageCascade_{false};
227 bool reporterExit_{false};
228 int maxNesting_{0}; // max level of nested parentheses
229};
230
231template <typename CHAR> static inline bool IsWhite(CHAR c) {
232 // White space. ' ' is standard. Other characters are extensions.
233 // Extension candidates:
234 // '\t' (horizontal tab)
235 // '\n' (new line)
236 // '\v' (vertical tab)
237 // '\f' (form feed)
238 // '\r' (carriage ret)
239 return c == ' ' || c == '\t' || c == '\v';
240}
241
242template <typename CHAR> CHAR FormatValidator<CHAR>::NextChar() {
243 for (++cursor_; cursor_ < end_; ++cursor_) {
244 if (!IsWhite(*cursor_)) {
245 return toupper(*cursor_);
246 }
247 }
248 cursor_ = end_; // don't allow cursor_ > end_
249 return ' ';
250}
251
252template <typename CHAR> CHAR FormatValidator<CHAR>::LookAheadChar() {
253 for (laCursor_ = cursor_ + 1; laCursor_ < end_; ++laCursor_) {
254 if (!IsWhite(*laCursor_)) {
255 return toupper(*laCursor_);
256 }
257 }
258 laCursor_ = end_; // don't allow laCursor_ > end_
259 return ' ';
260}
261
262// After a call to LookAheadChar, set token kind and advance cursor to laCursor.
263template <typename CHAR> void FormatValidator<CHAR>::Advance(TokenKind tk) {
264 cursor_ = laCursor_;
265 token_.set_kind(tk);
266}
267
268template <typename CHAR> void FormatValidator<CHAR>::NextToken() {
269 // At entry, cursor_ points before the start of the next token.
270 // At exit, cursor_ points to last CHAR of token_.
271
272 previousToken_ = token_;
273 CHAR c{NextChar()};
274 token_.set_kind(TokenKind::None);
275 token_.set_offset(cursor_ - format_);
276 token_.set_length(1);
277 if (c == '_' && integerValue_ >= 0) { // C1305, C1309, C1310, C1312, C1313
278 ReportError("Kind parameter '_' character in format expression");
279 }
280 integerValue_ = -1;
281
282 switch (c) {
283 case '0':
284 case '1':
285 case '2':
286 case '3':
287 case '4':
288 case '5':
289 case '6':
290 case '7':
291 case '8':
292 case '9': {
293 const CHAR *lastCursor{};
294 integerValue_ = 0;
295 bool overflow{false};
296 do {
297 lastCursor = cursor_;
298 if (!overflow) {
299 overflow = MulOverflow(
300 static_cast<std::int64_t>(10), integerValue_, integerValue_);
301 }
302 if (!overflow) {
303 overflow = AddOverflow(
304 integerValue_, static_cast<std::int64_t>(c - '0'), integerValue_);
305 }
306 c = NextChar();
307 } while (c >= '0' && c <= '9');
308 cursor_ = lastCursor;
309 token_.set_kind(TokenKind::UnsignedInteger);
310 if (overflow) {
311 SetLength();
312 ReportError("Integer overflow in format expression");
313 break;
314 }
315 if (LookAheadChar() != 'H') {
316 break;
317 }
318 // Hollerith constant
319 if (laCursor_ + integerValue_ < end_) {
320 token_.set_kind(TokenKind::String);
321 cursor_ = laCursor_ + integerValue_;
322 } else {
323 token_.set_kind(TokenKind::None);
324 cursor_ = end_;
325 }
326 SetLength();
327 if (stmt_ == IoStmtKind::Read) { // 13.3.2p6
328 ReportError("'H' edit descriptor in READ format expression");
329 } else if (token_.kind() == TokenKind::None) {
330 ReportError("Unterminated 'H' edit descriptor");
331 } else {
332 ReportWarning("Legacy 'H' edit descriptor");
333 }
334 break;
335 }
336 case 'A':
337 if (LookAheadChar() == 'T') {
338 Advance(TokenKind::AT);
339 } else {
340 token_.set_kind(TokenKind::A);
341 }
342 break;
343 case 'B':
344 switch (LookAheadChar()) {
345 case 'N':
346 Advance(TokenKind::BN);
347 break;
348 case 'Z':
349 Advance(TokenKind::BZ);
350 break;
351 default:
352 token_.set_kind(TokenKind::B);
353 break;
354 }
355 break;
356 case 'D':
357 switch (LookAheadChar()) {
358 case 'C':
359 Advance(TokenKind::DC);
360 break;
361 case 'P':
362 Advance(TokenKind::DP);
363 break;
364 case 'T':
365 Advance(TokenKind::DT);
366 break;
367 default:
368 token_.set_kind(TokenKind::D);
369 break;
370 }
371 break;
372 case 'E':
373 switch (LookAheadChar()) {
374 case 'N':
375 Advance(TokenKind::EN);
376 break;
377 case 'S':
378 Advance(TokenKind::ES);
379 break;
380 case 'X':
381 Advance(TokenKind::EX);
382 break;
383 default:
384 token_.set_kind(TokenKind::E);
385 break;
386 }
387 break;
388 case 'F':
389 token_.set_kind(TokenKind::F);
390 break;
391 case 'G':
392 token_.set_kind(TokenKind::G);
393 break;
394 case 'I':
395 token_.set_kind(TokenKind::I);
396 break;
397 case 'L':
398 switch (LookAheadChar()) {
399 case 'Z':
400 // Advance past 'Z', then look ahead for 'S' or 'P'
401 Advance(TokenKind::LZ);
402 switch (LookAheadChar()) {
403 case 'S':
404 Advance(TokenKind::LZS);
405 break;
406 case 'P':
407 Advance(TokenKind::LZP);
408 break;
409 default:
410 break;
411 }
412 break;
413 default:
414 token_.set_kind(TokenKind::L);
415 break;
416 }
417 break;
418 case 'O':
419 token_.set_kind(TokenKind::O);
420 break;
421 case 'P':
422 token_.set_kind(TokenKind::P);
423 break;
424 case 'R':
425 switch (LookAheadChar()) {
426 case 'C':
427 Advance(TokenKind::RC);
428 break;
429 case 'D':
430 Advance(TokenKind::RD);
431 break;
432 case 'N':
433 Advance(TokenKind::RN);
434 break;
435 case 'P':
436 Advance(TokenKind::RP);
437 break;
438 case 'U':
439 Advance(TokenKind::RU);
440 break;
441 case 'Z':
442 Advance(TokenKind::RZ);
443 break;
444 default:
445 token_.set_kind(TokenKind::None);
446 break;
447 }
448 break;
449 case 'S':
450 switch (LookAheadChar()) {
451 case 'P':
452 Advance(TokenKind::SP);
453 break;
454 case 'S':
455 Advance(TokenKind::SS);
456 break;
457 default:
458 token_.set_kind(TokenKind::S);
459 break;
460 }
461 break;
462 case 'T':
463 switch (LookAheadChar()) {
464 case 'L':
465 Advance(TokenKind::TL);
466 break;
467 case 'R':
468 Advance(TokenKind::TR);
469 break;
470 default:
471 token_.set_kind(TokenKind::T);
472 break;
473 }
474 break;
475 case 'X':
476 token_.set_kind(TokenKind::X);
477 break;
478 case 'Z':
479 token_.set_kind(TokenKind::Z);
480 break;
481 case '-':
482 case '+':
483 token_.set_kind(TokenKind::Sign);
484 break;
485 case '/':
486 token_.set_kind(TokenKind::Slash);
487 break;
488 case '(':
489 token_.set_kind(TokenKind::LParen);
490 break;
491 case ')':
492 token_.set_kind(TokenKind::RParen);
493 break;
494 case '.':
495 token_.set_kind(TokenKind::Point);
496 break;
497 case ':':
498 token_.set_kind(TokenKind::Colon);
499 break;
500 case '\\':
501 token_.set_kind(TokenKind::Backslash);
502 break;
503 case '$':
504 token_.set_kind(TokenKind::Dollar);
505 break;
506 case '*':
507 token_.set_kind(LookAheadChar() == '(' ? TokenKind::Star : TokenKind::None);
508 break;
509 case ',': {
510 token_.set_kind(TokenKind::Comma);
511 CHAR laChar = LookAheadChar();
512 if (laChar == ',') {
513 Advance(TokenKind::Comma);
514 token_.set_offset(cursor_ - format_);
515 ReportError("Unexpected ',' in format expression");
516 } else if (laChar == ')') {
517 ReportError("Unexpected ',' before ')' in format expression");
518 }
519 break;
520 }
521 case '\'':
522 case '"':
523 for (++cursor_; cursor_ < end_; ++cursor_) {
524 if (*cursor_ == c) {
525 if (auto nc{cursor_ + 1}; nc < end_ && *nc != c) {
526 token_.set_kind(TokenKind::String);
527 break;
528 }
529 ++cursor_;
530 }
531 }
532 SetLength();
533 if (token_.kind() != TokenKind::String) {
534 ReportError("Unterminated string");
535 } else if (stmt_ == IoStmtKind::Read &&
536 previousToken_.kind() != TokenKind::DT) { // 13.3.2p6
537 ReportWarning("String edit descriptor in READ format expression");
538 }
539 break;
540 default:
541 if (cursor_ >= end_ && !unterminatedFormatError_) {
542 suppressMessageCascade_ = false;
543 ReportError("Unterminated format expression");
544 unterminatedFormatError_ = true;
545 }
546 token_.set_kind(TokenKind::None);
547 break;
548 }
549
550 SetLength();
551}
552
553template <typename CHAR> void FormatValidator<CHAR>::check_r(bool allowed) {
554 if (!allowed && knrValue_ >= 0) {
555 ReportError("Repeat specifier before '%s' edit descriptor", knrToken_);
556 } else if (knrValue_ == 0) {
557 ReportError("'%s' edit descriptor repeat specifier must be positive",
558 knrToken_); // C1304
559 }
560}
561
562// Return the predicate "w value is present" to control further processing.
563template <typename CHAR> bool FormatValidator<CHAR>::check_w() {
564 if (token_.kind() == TokenKind::UnsignedInteger) {
565 wValue_ = integerValue_;
566 if (wValue_ == 0) {
567 if (*argString_ == 'A' || stmt_ == IoStmtKind::Read) {
568 // C1306, 13.7.2.1p6
569 ReportError("'%s' edit descriptor 'w' value must be positive");
570 } else if (*argString_ == 'L') {
571 ReportWarning("'%s' edit descriptor 'w' value should be positive");
572 }
573 }
574 NextToken();
575 return true;
576 }
577 if (*argString_ != 'A' && *argString_ != 'L') {
578 ReportWarning("Expected '%s' edit descriptor 'w' value"); // C1306
579 }
580 return false;
581}
582
583template <typename CHAR> void FormatValidator<CHAR>::check_m() {
584 if (token_.kind() != TokenKind::Point) {
585 return;
586 }
587 NextToken();
588 if (token_.kind() != TokenKind::UnsignedInteger) {
589 ReportError("Expected '%s' edit descriptor 'm' value after '.'");
590 return;
591 }
592 if ((stmt_ == IoStmtKind::Print || stmt_ == IoStmtKind::Write) &&
593 wValue_ > 0 && integerValue_ > wValue_) { // 13.7.2.2p5, 13.7.2.4p6
594 ReportError("'%s' edit descriptor 'm' value is greater than 'w' value");
595 }
596 NextToken();
597}
598
599// Return the predicate "d value is present" to control further processing.
600template <typename CHAR>
601bool FormatValidator<CHAR>::check_d(bool checkScaleFactor) {
602 if (token_.kind() != TokenKind::Point) {
603 ReportError("Expected '%s' edit descriptor '.d' value");
604 return false;
605 }
606 NextToken();
607 if (token_.kind() != TokenKind::UnsignedInteger) {
608 ReportError("Expected '%s' edit descriptor 'd' value after '.'");
609 return false;
610 }
611 if (checkScaleFactor) {
612 check_k();
613 }
614 NextToken();
615 return true;
616}
617
618// Check the value of scale factor k against a field width d.
619template <typename CHAR> void FormatValidator<CHAR>::check_k() {
620 // Limit the check to D and E edit descriptors in output statements that
621 // explicitly set the scale factor.
622 if (stmt_ != IoStmtKind::Print && stmt_ != IoStmtKind::Write) {
623 return;
624 }
625 if (!scaleFactorToken_.IsSet()) {
626 return;
627 }
628 // 13.7.2.3.3p5 - The values of d and k must satisfy:
629 // −d < k <= 0; or
630 // 0 < k < d+2
631 const int64_t d{integerValue_};
632 const int64_t k{scaleFactorValue_};
633 // Exception: d = k = 0 is nonstandard, but has a reasonable interpretation.
634 if (d == 0 && k == 0) {
635 return;
636 }
637 if (k <= 0 && !(-d < k)) {
638 ReportError("Negative scale factor k (from kP) and width d in a '%s' "
639 "edit descriptor must satisfy '-d < k'");
640 } else if (k > 0 && !(k < d + 2)) {
641 ReportError("Positive scale factor k (from kP) and width d in a '%s' "
642 "edit descriptor must satisfy 'k < d+2'");
643 }
644}
645
646template <typename CHAR> void FormatValidator<CHAR>::check_e() {
647 if (token_.kind() != TokenKind::E) {
648 return;
649 }
650 NextToken();
651 if (token_.kind() != TokenKind::UnsignedInteger) {
652 ReportError("Expected '%s' edit descriptor 'e' value after 'E'");
653 return;
654 }
655 NextToken();
656}
657
658template <typename CHAR> bool FormatValidator<CHAR>::Check() {
659 if (!*format_) {
660 ReportError("Empty format expression");
661 return formatHasErrors_;
662 }
663 NextToken();
664 if (token_.kind() != TokenKind::LParen) {
665 ReportError("Format expression must have an initial '('");
666 return formatHasErrors_;
667 }
668 NextToken();
669
670 int nestLevel{0}; // Outer level ()s are at level 0.
671 Token starToken{}; // unlimited format token
672 bool hasDataEditDesc{false};
673
674 // Subject to error recovery exceptions, a loop iteration processes one
675 // edit descriptor or does list management. The loop terminates when
676 // - a level-0 right paren is processed (format may be valid)
677 // - the end of an incomplete format is reached
678 // - the error reporter requests termination (error threshold reached)
679 while (!reporterExit_) {
680 Token signToken{};
681 knrValue_ = -1; // -1 ==> not present
682 wValue_ = -1;
683 bool commaRequired{true};
684
685 if (token_.kind() == TokenKind::Sign) {
686 signToken = token_;
687 NextToken();
688 }
689 if (token_.kind() == TokenKind::UnsignedInteger) {
690 knrToken_ = token_;
691 knrValue_ = integerValue_;
692 NextToken();
693 }
694 if (signToken.IsSet() && (knrValue_ < 0 || token_.kind() != TokenKind::P)) {
695 argString_[0] = format_[signToken.offset()];
696 argString_[1] = 0;
697 ReportError("Unexpected '%s' in format expression", signToken);
698 }
699 // Default message argument.
700 // Alphabetic edit descriptor names are one to three characters in length.
701 argString_[0] = toupper(format_[token_.offset()]);
702 if (token_.length() > 2) {
703 // Three-character descriptor names (e.g., LZP, LZS).
704 // token_.offset() has the first character and *cursor_ has the last;
705 // find the middle character by scanning past any blanks.
706 const CHAR *mid{format_ + token_.offset() + 1};
707 while (mid < cursor_ && IsWhite(*mid)) {
708 ++mid;
709 }
710 argString_[1] = toupper(*mid);
711 argString_[2] = toupper(*cursor_);
712 } else {
713 argString_[1] = token_.length() > 1 ? toupper(*cursor_) : 0;
714 argString_[2] = 0;
715 }
716 // Process one format edit descriptor or do format list management.
717 switch (token_.kind()) {
718 case TokenKind::A:
719 // R1307 data-edit-desc -> A [w]
720 hasDataEditDesc = true;
721 check_r();
722 NextToken();
723 check_w();
724 break;
725 case TokenKind::AT:
726 // F2023 data-edit-desc -> AT (no w allowed)
727 hasDataEditDesc = true;
728 check_r();
729 NextToken();
730 if (token_.kind() == TokenKind::UnsignedInteger) {
731 ReportError("'AT' edit descriptor does not accept a width value");
732 NextToken();
733 // reset to allow the Read check below to also report
734 suppressMessageCascade_ = false;
735 }
736 if (stmt_ == IoStmtKind::Read) {
737 ReportError("'AT' edit descriptor must not be used for input");
738 // reset to allow subsequent '.' check to also report
739 suppressMessageCascade_ = false;
740 }
741 break;
742 case TokenKind::B:
743 case TokenKind::I:
744 case TokenKind::O:
745 case TokenKind::Z:
746 // R1307 data-edit-desc -> B w [. m] | I w [. m] | O w [. m] | Z w [. m]
747 hasDataEditDesc = true;
748 check_r();
749 NextToken();
750 if (check_w()) {
751 check_m();
752 }
753 break;
754 case TokenKind::D:
755 case TokenKind::F: {
756 // R1307 data-edit-desc -> D w . d | F w . d
757 bool isD{token_.kind() == TokenKind::D};
758 hasDataEditDesc = true;
759 check_r();
760 NextToken();
761 if (check_w()) {
762 check_d(/*checkScaleFactor=*/isD);
763 }
764 break;
765 }
766 case TokenKind::E:
767 case TokenKind::EN:
768 case TokenKind::ES:
769 case TokenKind::EX: {
770 // R1307 data-edit-desc ->
771 // E w . d [E e] | EN w . d [E e] | ES w . d [E e] | EX w . d [E e]
772 bool isE{token_.kind() == TokenKind::E};
773 hasDataEditDesc = true;
774 check_r();
775 NextToken();
776 if (check_w() && check_d(/*checkScaleFactor=*/isE)) {
777 check_e();
778 }
779 break;
780 }
781 case TokenKind::G:
782 // R1307 data-edit-desc -> G w [. d [E e]]
783 hasDataEditDesc = true;
784 check_r();
785 NextToken();
786 if (check_w()) {
787 if (wValue_ > 0) {
788 if (check_d()) { // C1307
789 check_e();
790 }
791 } else if (token_.kind() == TokenKind::Point && check_d() &&
792 token_.kind() == TokenKind::E) { // C1308
793 ReportError("A 'G0' edit descriptor must not have an 'e' value");
794 NextToken();
795 if (token_.kind() == TokenKind::UnsignedInteger) {
796 NextToken();
797 }
798 }
799 }
800 break;
801 case TokenKind::L:
802 // R1307 data-edit-desc -> L w
803 hasDataEditDesc = true;
804 check_r();
805 NextToken();
806 check_w();
807 break;
808 case TokenKind::DT:
809 // R1307 data-edit-desc -> DT [char-literal-constant] [( v-list )]
810 hasDataEditDesc = true;
811 check_r();
812 NextToken();
813 if (token_.kind() == TokenKind::String) {
814 NextToken();
815 }
816 if (token_.kind() == TokenKind::LParen) {
817 do {
818 NextToken();
819 if (token_.kind() == TokenKind::Sign) {
820 NextToken();
821 }
822 if (token_.kind() != TokenKind::UnsignedInteger) {
823 ReportError(
824 "Expected integer constant in 'DT' edit descriptor v-list");
825 break;
826 }
827 NextToken();
828 } while (token_.kind() == TokenKind::Comma);
829 if (token_.kind() != TokenKind::RParen) {
830 ReportError("Expected ',' or ')' in 'DT' edit descriptor v-list");
831 while (cursor_ < end_ && token_.kind() != TokenKind::RParen) {
832 NextToken();
833 }
834 }
835 NextToken();
836 }
837 break;
838 case TokenKind::String:
839 // R1304 data-edit-desc -> char-string-edit-desc
840 if (knrValue_ >= 0) {
841 ReportError("Repeat specifier before character string edit descriptor",
842 knrToken_);
843 }
844 NextToken();
845 break;
846 case TokenKind::BN:
847 case TokenKind::BZ:
848 case TokenKind::DC:
849 case TokenKind::DP:
850 case TokenKind::LZ:
851 case TokenKind::LZS:
852 case TokenKind::LZP:
853 case TokenKind::RC:
854 case TokenKind::RD:
855 case TokenKind::RN:
856 case TokenKind::RP:
857 case TokenKind::RU:
858 case TokenKind::RZ:
859 case TokenKind::S:
860 case TokenKind::SP:
861 case TokenKind::SS:
862 // R1317 sign-edit-desc -> SS | SP | S
863 // R1318 blank-interp-edit-desc -> BN | BZ
864 // R1319 round-edit-desc -> RU | RD | RZ | RN | RC | RP
865 // R1320 decimal-edit-desc -> DC | DP
866 // F202X leading-zero-edit-desc -> LZ | LZS | LZP
867 check_r(false);
868 NextToken();
869 break;
870 case TokenKind::P: {
871 // R1313 control-edit-desc -> k P
872 if (knrValue_ < 0) {
873 ReportError("'P' edit descriptor must have a scale factor");
874 } else {
875 scaleFactorToken_ = knrToken_;
876 if (signToken.IsSet() && format_[signToken.offset()] == '-') {
877 scaleFactorValue_ = -knrValue_;
878 } else {
879 scaleFactorValue_ = knrValue_;
880 }
881 }
882 // Diagnosing C1302 may require multiple token lookahead.
883 // Save current cursor position to enable backup.
884 const CHAR *saveCursor{cursor_};
885 NextToken();
886 if (token_.kind() == TokenKind::UnsignedInteger) {
887 NextToken();
888 }
889 switch (token_.kind()) {
890 case TokenKind::D:
891 case TokenKind::E:
892 case TokenKind::EN:
893 case TokenKind::ES:
894 case TokenKind::EX:
895 case TokenKind::F:
896 case TokenKind::G:
897 commaRequired = false;
898 break;
899 default:;
900 }
901 cursor_ = saveCursor;
902 NextToken();
903 break;
904 }
905 case TokenKind::T:
906 case TokenKind::TL:
907 case TokenKind::TR:
908 // R1315 position-edit-desc -> T n | TL n | TR n
909 check_r(false);
910 NextToken();
911 if (integerValue_ <= 0) { // C1311
912 ReportError("'%s' edit descriptor must have a positive position value");
913 }
914 NextToken();
915 break;
916 case TokenKind::X:
917 // R1315 position-edit-desc -> n X
918 if (knrValue_ == 0) { // C1311
919 ReportError("'X' edit descriptor must have a positive position value",
920 knrToken_);
921 } else if (knrValue_ < 0) {
922 ReportWarning(
923 "'X' edit descriptor must have a positive position value");
924 }
925 NextToken();
926 break;
927 case TokenKind::Colon:
928 // R1313 control-edit-desc -> :
929 check_r(false);
930 commaRequired = false;
931 NextToken();
932 break;
933 case TokenKind::Slash:
934 // R1313 control-edit-desc -> [r] /
935 commaRequired = false;
936 NextToken();
937 break;
938 case TokenKind::Backslash:
939 check_r(false);
940 ReportWarning("Non-standard '\\' edit descriptor");
941 NextToken();
942 break;
943 case TokenKind::Dollar:
944 check_r(false);
945 ReportWarning("Non-standard '$' edit descriptor");
946 NextToken();
947 break;
948 case TokenKind::Star:
949 // NextToken assigns a token kind of Star only if * is followed by (.
950 // So the next token is guaranteed to be LParen.
951 if (nestLevel > 0) {
952 ReportError("Nested unlimited format item list");
953 }
954 starToken = token_;
955 if (knrValue_ >= 0) {
956 ReportError(
957 "Repeat specifier before unlimited format item list", knrToken_);
958 }
959 hasDataEditDesc = false;
960 NextToken();
961 [[fallthrough]];
962 case TokenKind::LParen:
963 if (knrValue_ == 0) {
964 ReportError("List repeat specifier must be positive", knrToken_);
965 }
966 if (++nestLevel > maxNesting_) {
967 maxNesting_ = nestLevel;
968 }
969 if (LookAheadChar() == ')') {
970 ReportError("Nested parenthesized format item list is empty");
971 }
972 break;
973 case TokenKind::RParen:
974 if (knrValue_ >= 0) {
975 ReportError("Unexpected integer constant", knrToken_);
976 }
977 do {
978 if (nestLevel == 0) {
979 // Any characters after level-0 ) are ignored.
980 return formatHasErrors_; // normal exit (may have messages)
981 }
982 if (nestLevel == 1 && starToken.IsSet() && !hasDataEditDesc) {
983 SetLength(starToken);
984 ReportError( // C1303
985 "Unlimited format item list must contain a data edit descriptor",
986 starToken);
987 }
988 --nestLevel;
989 NextToken();
990 } while (token_.kind() == TokenKind::RParen);
991 if (nestLevel == 0 && starToken.IsSet()) {
992 ReportError("Character in format after unlimited format item list");
993 }
994 break;
995 case TokenKind::Comma:
996 if (knrValue_ >= 0) {
997 ReportError("Unexpected integer constant", knrToken_);
998 }
999 if (suppressMessageCascade_ || reporterExit_) {
1000 break;
1001 }
1002 [[fallthrough]];
1003 default:
1004 ReportError("Unexpected '%s' in format expression");
1005 NextToken();
1006 }
1007
1008 // Process comma separator and exit an incomplete format.
1009 switch (token_.kind()) {
1010 case TokenKind::Colon: // Comma not required; token not yet processed.
1011 case TokenKind::Slash: // Comma not required; token not yet processed.
1012 case TokenKind::RParen: // Comma not allowed; token not yet processed.
1013 suppressMessageCascade_ = false;
1014 break;
1015 case TokenKind::LParen: // Comma not allowed; token already processed.
1016 case TokenKind::Comma: // Normal comma case; move past token.
1017 suppressMessageCascade_ = false;
1018 NextToken();
1019 break;
1020 case TokenKind::Sign: // Error; main switch has a better message.
1021 case TokenKind::None: // Error; token not yet processed.
1022 if (cursor_ >= end_) {
1023 return formatHasErrors_; // incomplete format error exit
1024 }
1025 break;
1026 default:
1027 // Possible first token of the next format item; token not yet processed.
1028 if (commaRequired) {
1029 const char *s{"Expected ',' or ')' in format expression"}; // C1302
1030 if (previousToken_.kind() == TokenKind::UnsignedInteger &&
1031 previousToken_.length() > 1 &&
1032 itemsWithLeadingInts_.test(token_.kind())) {
1033 // F10.32F10.3 is ambiguous, F10.3F10.3 is not
1034 ReportError(s);
1035 } else {
1036 ReportWarning(s);
1037 }
1038 }
1039 }
1040 }
1041
1042 return formatHasErrors_; // error reporter (message threshold) exit
1043}
1044
1045} // namespace Fortran::common
1046#endif // FORTRAN_COMMON_FORMAT_H_
Definition bit-population-count.h:20
Definition format.h:106