FLANG
format-implementation.h
1//===-- runtime/format-implementation.h -------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Implements out-of-line member functions of template class FormatControl
10
11#ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
12#define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
13
14#include "emit-encoded.h"
15#include "format.h"
16#include "io-stmt.h"
17#include "memory.h"
18#include "flang/Common/format.h"
19#include "flang/Decimal/decimal.h"
20#include "flang/Runtime/main.h"
21#include <algorithm>
22#include <cstring>
23#include <limits>
24
25namespace Fortran::runtime::io {
26
27template <typename CONTEXT>
28RT_API_ATTRS FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
29 const CharType *format, std::size_t formatLength,
30 const Descriptor *formatDescriptor, int maxHeight)
31 : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format},
32 formatLength_{static_cast<int>(formatLength)} {
33 RUNTIME_CHECK(terminator, maxHeight == maxHeight_);
34 if (!format && formatDescriptor) {
35 // The format is a character array passed via a descriptor.
36 std::size_t elements{formatDescriptor->Elements()};
37 std::size_t elementBytes{formatDescriptor->ElementBytes()};
38 formatLength = elements * elementBytes / sizeof(CharType);
39 formatLength_ = static_cast<int>(formatLength);
40 if (formatDescriptor->IsContiguous()) {
41 // Treat the contiguous array as a single character value.
42 format_ = const_cast<const CharType *>(
43 reinterpret_cast<CharType *>(formatDescriptor->raw().base_addr));
44 } else {
45 // Concatenate its elements into a temporary array.
46 char *p{reinterpret_cast<char *>(
47 AllocateMemoryOrCrash(terminator, formatLength * sizeof(CharType)))};
48 format_ = p;
49 SubscriptValue at[maxRank];
50 formatDescriptor->GetLowerBounds(at);
51 for (std::size_t j{0}; j < elements; ++j) {
52 std::memcpy(p, formatDescriptor->Element<char>(at), elementBytes);
53 p += elementBytes;
54 formatDescriptor->IncrementSubscripts(at);
55 }
56 freeFormat_ = true;
57 }
58 }
59 RUNTIME_CHECK(
60 terminator, formatLength == static_cast<std::size_t>(formatLength_));
61 stack_[0].start = offset_;
62 stack_[0].remaining = Iteration::unlimited; // 13.4(8)
63}
64
65template <typename CONTEXT>
66RT_API_ATTRS int FormatControl<CONTEXT>::GetIntField(
67 IoErrorHandler &handler, CharType firstCh, bool *hadError) {
68 CharType ch{firstCh ? firstCh : PeekNext()};
69 bool negate{ch == '-'};
70 if (negate || ch == '+') {
71 if (firstCh) {
72 firstCh = '\0';
73 } else {
74 ++offset_;
75 }
76 ch = PeekNext();
77 }
78 if (ch < '0' || ch > '9') {
79 handler.SignalError(IostatErrorInFormat,
80 "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch));
81 if (hadError) {
82 *hadError = true;
83 }
84 return 0;
85 }
86 int result{0};
87 while (ch >= '0' && ch <= '9') {
88 constexpr int tenth{std::numeric_limits<int>::max() / 10};
89 if (result > tenth ||
90 ch - '0' > std::numeric_limits<int>::max() - 10 * result) {
91 handler.SignalError(
92 IostatErrorInFormat, "FORMAT integer field out of range");
93 if (hadError) {
94 *hadError = true;
95 }
96 return result;
97 }
98 result = 10 * result + ch - '0';
99 if (firstCh) {
100 firstCh = '\0';
101 } else {
102 ++offset_;
103 }
104 ch = PeekNext();
105 }
106 if (negate && (result *= -1) > 0) {
107 handler.SignalError(
108 IostatErrorInFormat, "FORMAT integer field out of range");
109 if (hadError) {
110 *hadError = true;
111 }
112 }
113 return result;
114}
115
116// Xn, TRn, TLn
117template <typename CONTEXT>
118static RT_API_ATTRS bool RelativeTabbing(CONTEXT &context, int n) {
119 ConnectionState &connection{context.GetConnectionState()};
120 if constexpr (std::is_same_v<CONTEXT,
121 ExternalFormattedIoStatementState<Direction::Input>> ||
122 std::is_same_v<CONTEXT,
123 ExternalFormattedIoStatementState<Direction::Output>>) {
124 if (n != 0 && connection.isUTF8) {
125 const char *p{};
126 if (n > 0) { // Xn or TRn
127 // Skip 'n' multi-byte characters. If that's more than are in the
128 // current record, that's valid -- the program can position past the
129 // end and then reposition back with Tn or TLn.
130 std::size_t bytesLeft{context.ViewBytesInRecord(p, true)};
131 for (; n > 0 && bytesLeft && p; --n) {
132 std::size_t byteCount{MeasureUTF8Bytes(*p)};
133 if (byteCount > bytesLeft) {
134 break;
135 }
136 context.HandleRelativePosition(byteCount);
137 bytesLeft -= byteCount;
138 // Don't call GotChar(byteCount), these don't count towards SIZE=
139 p += byteCount;
140 }
141 } else { // n < 0: TLn
142 n = -n;
143 if (std::int64_t excess{connection.positionInRecord -
144 connection.recordLength.value_or(connection.positionInRecord)};
145 excess > 0) {
146 // Have tabbed past the end of the record
147 if (excess >= n) {
148 context.HandleRelativePosition(-n);
149 return true;
150 }
151 context.HandleRelativePosition(-excess);
152 n -= excess;
153 }
154 std::size_t bytesLeft{context.ViewBytesInRecord(p, false)};
155 // Go back 'n' multi-byte characters.
156 for (; n > 0 && bytesLeft && p; --n) {
157 std::size_t byteCount{MeasurePreviousUTF8Bytes(p, bytesLeft)};
158 context.HandleRelativePosition(-byteCount);
159 bytesLeft -= byteCount;
160 p -= byteCount;
161 }
162 }
163 }
164 }
165 if (connection.internalIoCharKind > 1) {
166 n *= connection.internalIoCharKind;
167 }
168 context.HandleRelativePosition(n);
169 return true;
170}
171
172// Tn
173template <typename CONTEXT>
174static RT_API_ATTRS bool AbsoluteTabbing(CONTEXT &context, int n) {
175 ConnectionState &connection{context.GetConnectionState()};
176 n = n > 0 ? n - 1 : 0; // convert 1-based position to 0-based offset
177 if constexpr (std::is_same_v<CONTEXT,
178 ExternalFormattedIoStatementState<Direction::Input>> ||
179 std::is_same_v<CONTEXT,
180 ExternalFormattedIoStatementState<Direction::Output>>) {
181 if (connection.isUTF8) {
182 // Reset to the beginning of the record, then TR(n-1)
183 connection.HandleAbsolutePosition(0);
184 return RelativeTabbing(context, n);
185 }
186 }
187 if (connection.internalIoCharKind > 1) {
188 n *= connection.internalIoCharKind;
189 }
190 context.HandleAbsolutePosition(n);
191 return true;
192}
193
194template <typename CONTEXT>
195static RT_API_ATTRS void HandleControl(
196 CONTEXT &context, char ch, char next, int n) {
197 MutableModes &modes{context.mutableModes()};
198 switch (ch) {
199 case 'B':
200 if (next == 'Z') {
201 modes.editingFlags |= blankZero;
202 return;
203 }
204 if (next == 'N') {
205 modes.editingFlags &= ~blankZero;
206 return;
207 }
208 break;
209 case 'D':
210 if (next == 'C') {
211 modes.editingFlags |= decimalComma;
212 return;
213 }
214 if (next == 'P') {
215 modes.editingFlags &= ~decimalComma;
216 return;
217 }
218 break;
219 case 'P':
220 if (!next) {
221 modes.scale = n; // kP - decimal scaling by 10**k
222 return;
223 }
224 break;
225 case 'R':
226 switch (next) {
227 case 'N':
228 modes.round = decimal::RoundNearest;
229 return;
230 case 'Z':
231 modes.round = decimal::RoundToZero;
232 return;
233 case 'U':
234 modes.round = decimal::RoundUp;
235 return;
236 case 'D':
237 modes.round = decimal::RoundDown;
238 return;
239 case 'C':
240 modes.round = decimal::RoundCompatible;
241 return;
242 case 'P':
243 modes.round = executionEnvironment.defaultOutputRoundingMode;
244 return;
245 default:
246 break;
247 }
248 break;
249 case 'X':
250 if (!next && RelativeTabbing(context, n)) {
251 return;
252 }
253 break;
254 case 'S':
255 if (next == 'P') {
256 modes.editingFlags |= signPlus;
257 return;
258 }
259 if (!next || next == 'S') {
260 modes.editingFlags &= ~signPlus;
261 return;
262 }
263 break;
264 case 'T': {
265 if (!next) { // Tn
266 if (AbsoluteTabbing(context, n)) {
267 return;
268 }
269 } else if (next == 'R' || next == 'L') { // TRn / TLn
270 if (RelativeTabbing(context, next == 'L' ? -n : n)) {
271 return;
272 }
273 }
274 } break;
275 default:
276 break;
277 }
278 if (next) {
279 context.SignalError(IostatErrorInFormat,
280 "Unknown '%c%c' edit descriptor in FORMAT", ch, next);
281 } else {
282 context.SignalError(
283 IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch);
284 }
285}
286
287// Locates the next data edit descriptor in the format.
288// Handles all repetition counts and control edit descriptors.
289// Generally assumes that the format string has survived the common
290// format validator gauntlet.
291template <typename CONTEXT>
292RT_API_ATTRS int FormatControl<CONTEXT>::CueUpNextDataEdit(
293 Context &context, bool stop) {
294 bool hitUnlimitedLoopEnd{false};
295 // Do repetitions remain on an unparenthesized data edit?
296 while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') {
297 offset_ = stack_[height_ - 1].start;
298 int repeat{stack_[height_ - 1].remaining};
299 --height_;
300 if (repeat > 0) {
301 return repeat;
302 }
303 }
304 while (true) {
305 Fortran::common::optional<int> repeat;
306 bool unlimited{false};
307 auto maybeReversionPoint{offset_};
308 CharType ch{GetNextChar(context)};
309 while (ch == ',' || ch == ':') {
310 // Skip commas, and don't complain if they're missing; the format
311 // validator does that.
312 if (stop && ch == ':') {
313 return 0;
314 }
315 ch = GetNextChar(context);
316 }
317 if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) {
318 bool hadSign{ch == '-' || ch == '+'};
319 repeat = GetIntField(context, ch);
320 ch = GetNextChar(context);
321 if (hadSign && ch != 'p' && ch != 'P') {
322 ReportBadFormat(context,
323 "Invalid FORMAT: signed integer may appear only before 'P",
324 maybeReversionPoint);
325 return 0;
326 }
327 } else if (ch == '*') {
328 unlimited = true;
329 ch = GetNextChar(context);
330 if (ch != '(') {
331 ReportBadFormat(context,
332 "Invalid FORMAT: '*' may appear only before '('",
333 maybeReversionPoint);
334 return 0;
335 }
336 if (height_ != 1) {
337 ReportBadFormat(context,
338 "Invalid FORMAT: '*' must be nested in exactly one set of "
339 "parentheses",
340 maybeReversionPoint);
341 return 0;
342 }
343 }
344 ch = Capitalize(ch);
345 if (ch == '(') {
346 if (height_ >= maxHeight_) {
347 ReportBadFormat(context,
348 "FORMAT stack overflow: too many nested parentheses",
349 maybeReversionPoint);
350 return 0;
351 }
352 stack_[height_].start = offset_ - 1; // the '('
353 RUNTIME_CHECK(context, format_[stack_[height_].start] == '(');
354 if (unlimited || height_ == 0) {
355 stack_[height_].remaining = Iteration::unlimited;
356 } else if (repeat) {
357 if (*repeat <= 0) {
358 *repeat = 1; // error recovery
359 }
360 stack_[height_].remaining = *repeat - 1;
361 } else {
362 stack_[height_].remaining = 0;
363 }
364 if (height_ == 1 && !hitEnd_) {
365 // Subtle point (F'2018 13.4 para 9): the last parenthesized group
366 // at height 1 becomes the restart point after control reaches the
367 // end of the format, including its repeat count.
368 stack_[0].start = maybeReversionPoint;
369 }
370 ++height_;
371 } else if (height_ == 0) {
372 ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint);
373 return 0;
374 } else if (ch == ')') {
375 if (height_ == 1) {
376 hitEnd_ = true;
377 if (stop) {
378 return 0; // end of FORMAT and no data items remain
379 }
380 context.AdvanceRecord(); // implied / before rightmost )
381 }
382 auto restart{stack_[height_ - 1].start};
383 if (format_[restart] == '(') {
384 ++restart;
385 }
386 if (stack_[height_ - 1].remaining == Iteration::unlimited) {
387 if (height_ > 1 && GetNextChar(context) != ')') {
388 ReportBadFormat(context,
389 "Unlimited repetition in FORMAT may not be followed by more "
390 "items",
391 restart);
392 return 0;
393 }
394 if (hitUnlimitedLoopEnd) {
395 ReportBadFormat(context,
396 "Unlimited repetition in FORMAT lacks data edit descriptors",
397 restart);
398 return 0;
399 }
400 hitUnlimitedLoopEnd = true;
401 offset_ = restart;
402 } else if (stack_[height_ - 1].remaining-- > 0) {
403 offset_ = restart;
404 } else {
405 --height_;
406 }
407 } else if (ch == '\'' || ch == '"') {
408 // Quoted 'character literal'
409 CharType quote{ch};
410 auto start{offset_};
411 while (offset_ < formatLength_ && format_[offset_] != quote) {
412 ++offset_;
413 }
414 if (offset_ >= formatLength_) {
415 ReportBadFormat(context,
416 "FORMAT missing closing quote on character literal",
417 maybeReversionPoint);
418 return 0;
419 }
420 ++offset_;
421 std::size_t chars{
422 static_cast<std::size_t>(&format_[offset_] - &format_[start])};
423 if (offset_ < formatLength_ && format_[offset_] == quote) {
424 // subtle: handle doubled quote character in a literal by including
425 // the first in the output, then treating the second as the start
426 // of another character literal.
427 } else {
428 --chars;
429 }
430 EmitAscii(context, format_ + start, chars);
431 } else if (ch == 'H') {
432 // 9HHOLLERITH
433 if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) {
434 ReportBadFormat(context, "Invalid width on Hollerith in FORMAT",
435 maybeReversionPoint);
436 return 0;
437 }
438 EmitAscii(context, format_ + offset_, static_cast<std::size_t>(*repeat));
439 offset_ += *repeat;
440 } else if (ch >= 'A' && ch <= 'Z') {
441 int start{offset_ - 1};
442 CharType next{'\0'};
443 if (ch != 'P') { // 1PE5.2 - comma not required (C1302)
444 CharType peek{Capitalize(PeekNext())};
445 if (peek >= 'A' && peek <= 'Z') {
446 if ((ch == 'A' && peek == 'T' /* anticipate F'202X AT editing */) ||
447 ch == 'B' || ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' ||
448 ch == 'T') {
449 // Assume a two-letter edit descriptor
450 next = peek;
451 ++offset_;
452 } else {
453 // extension: assume a comma between 'ch' and 'peek'
454 }
455 }
456 }
457 if ((!next &&
458 (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' ||
459 ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' ||
460 ch == 'L')) ||
461 (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) ||
462 (ch == 'D' && next == 'T')) {
463 // Data edit descriptor found
464 offset_ = start;
465 return repeat && *repeat > 0 ? *repeat : 1;
466 } else {
467 // Control edit descriptor
468 if (ch == 'T') { // Tn, TLn, TRn
469 repeat = GetIntField(context);
470 }
471 HandleControl(context, static_cast<char>(ch), static_cast<char>(next),
472 repeat ? *repeat : 1);
473 }
474 } else if (ch == '/') {
475 context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1);
476 } else if (ch == '$' || ch == '\\') {
477 context.mutableModes().nonAdvancing = true;
478 } else if (ch == '\t' || ch == '\v') {
479 // Tabs (extension)
480 // TODO: any other raw characters?
481 EmitAscii(context, format_ + offset_ - 1, 1);
482 } else {
483 ReportBadFormat(
484 context, "Invalid character in FORMAT", maybeReversionPoint);
485 return 0;
486 }
487 }
488}
489
490// Returns the next data edit descriptor
491template <typename CONTEXT>
492RT_API_ATTRS Fortran::common::optional<DataEdit>
493FormatControl<CONTEXT>::GetNextDataEdit(Context &context, int maxRepeat) {
494 int repeat{CueUpNextDataEdit(context)};
495 auto start{offset_};
496 DataEdit edit;
497 edit.modes = context.mutableModes();
498 // Handle repeated nonparenthesized edit descriptors
499 edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0
500 if (repeat > maxRepeat) {
501 stack_[height_].start = start; // after repeat count
502 stack_[height_].remaining = repeat - edit.repeat;
503 ++height_;
504 }
505 edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context)));
506 if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') {
507 // DT['iotype'][(v_list)] defined I/O
508 edit.descriptor = DataEdit::DefinedDerivedType;
509 ++offset_;
510 if (auto quote{static_cast<char>(PeekNext())};
511 quote == '\'' || quote == '"') {
512 // Capture the quoted 'iotype'
513 bool ok{false};
514 for (++offset_; offset_ < formatLength_;) {
515 auto ch{static_cast<char>(format_[offset_++])};
516 if (ch == quote &&
517 (offset_ == formatLength_ ||
518 static_cast<char>(format_[offset_]) != quote)) {
519 ok = true;
520 break; // that was terminating quote
521 }
522 if (edit.ioTypeChars >= edit.maxIoTypeChars) {
523 ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start);
524 return Fortran::common::nullopt;
525 }
526 edit.ioType[edit.ioTypeChars++] = ch;
527 if (ch == quote) {
528 ++offset_;
529 }
530 }
531 if (!ok) {
532 ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start);
533 return Fortran::common::nullopt;
534 }
535 }
536 if (PeekNext() == '(') {
537 // Capture the v_list arguments
538 bool ok{false};
539 for (++offset_; offset_ < formatLength_;) {
540 bool hadError{false};
541 int n{GetIntField(context, '\0', &hadError)};
542 if (hadError) {
543 ok = false;
544 break;
545 }
546 if (edit.vListEntries >= edit.maxVListEntries) {
547 ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start);
548 return Fortran::common::nullopt;
549 }
550 edit.vList[edit.vListEntries++] = n;
551 auto ch{static_cast<char>(GetNextChar(context))};
552 if (ch != ',') {
553 ok = ch == ')';
554 break;
555 }
556 }
557 if (!ok) {
558 ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start);
559 return Fortran::common::nullopt;
560 }
561 }
562 } else { // not DT'iotype'
563 if (edit.descriptor == 'E') {
564 if (auto next{static_cast<char>(Capitalize(PeekNext()))};
565 next == 'N' || next == 'S' || next == 'X') {
566 edit.variation = next;
567 ++offset_;
568 }
569 }
570 // Width is optional for A[w] in the standard and optional
571 // for Lw in most compilers.
572 // Intel & (presumably, from bug report) Fujitsu allow
573 // a missing 'w' & 'd'/'m' for other edit descriptors -- but not
574 // 'd'/'m' with a missing 'w' -- and so interpret "(E)" as "(E0)".
575 if (CharType ch{PeekNext()}; (ch >= '0' && ch <= '9') || ch == '.') {
576 edit.width = GetIntField(context);
577 if constexpr (std::is_base_of_v<InputStatementState, CONTEXT>) {
578 if (edit.width.value_or(-1) == 0) {
579 ReportBadFormat(context, "Input field width is zero", start);
580 }
581 }
582 if (PeekNext() == '.') {
583 ++offset_;
584 edit.digits = GetIntField(context);
585 if (CharType ch{PeekNext()};
586 ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
587 ++offset_;
588 edit.expoDigits = GetIntField(context);
589 }
590 }
591 }
592 }
593 return edit;
594}
595
596template <typename CONTEXT>
597RT_API_ATTRS void FormatControl<CONTEXT>::Finish(Context &context) {
598 CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */);
599 if (freeFormat_) {
600 FreeMemory(const_cast<CharType *>(format_));
601 }
602}
603} // namespace Fortran::runtime::io
604#endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_