FLANG
prescan.h
1//===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef FORTRAN_PARSER_PRESCAN_H_
10#define FORTRAN_PARSER_PRESCAN_H_
11
12// Defines a fast Fortran source prescanning phase that implements some
13// character-level features of the language that can be inefficient to
14// support directly in a backtracking parser. This phase handles Fortran
15// line continuation, comment removal, card image margins, padding out
16// fixed form character literals on truncated card images, file
17// inclusion, and driving the Fortran source preprocessor.
18
19#include "flang/Parser/characters.h"
20#include "flang/Parser/message.h"
21#include "flang/Parser/provenance.h"
22#include "flang/Parser/token-sequence.h"
23#include "flang/Support/Fortran-features.h"
24#include <bitset>
25#include <optional>
26#include <string>
27#include <unordered_set>
28
29namespace Fortran::parser {
30
31class Messages;
32class Preprocessor;
33
34class Prescanner {
35public:
36 Prescanner(Messages &, CookedSource &, Preprocessor &,
38 Prescanner(
39 const Prescanner &, Preprocessor &, bool isNestedInIncludeDirective);
40 Prescanner(const Prescanner &) = delete;
41 Prescanner(Prescanner &&) = delete;
42
43 const AllSources &allSources() const { return allSources_; }
44 AllSources &allSources() { return allSources_; }
45 const Messages &messages() const { return messages_; }
46 Messages &messages() { return messages_; }
47 const Preprocessor &preprocessor() const { return preprocessor_; }
48 Preprocessor &preprocessor() { return preprocessor_; }
49 common::LanguageFeatureControl &features() { return features_; }
50
51 Prescanner &set_preprocessingOnly(bool yes) {
52 preprocessingOnly_ = yes;
53 return *this;
54 }
55 Prescanner &set_expandIncludeLines(bool yes) {
56 expandIncludeLines_ = yes;
57 return *this;
58 }
59 Prescanner &set_fixedForm(bool yes) {
60 inFixedForm_ = yes;
61 return *this;
62 }
63 Prescanner &set_encoding(Encoding code) {
64 encoding_ = code;
65 return *this;
66 }
67 Prescanner &set_fixedFormColumnLimit(int limit) {
68 fixedFormColumnLimit_ = limit;
69 return *this;
70 }
71
72 Prescanner &AddCompilerDirectiveSentinel(const std::string &);
73
74 void Prescan(ProvenanceRange);
75 void Statement();
76 void NextLine();
77
78 // Callbacks for use by Preprocessor.
79 bool IsAtEnd() const { return nextLine_ >= limit_; }
80 bool IsNextLinePreprocessorDirective() const;
81 TokenSequence TokenizePreprocessorDirective();
82 Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
83
84 std::optional<CharBlock> GetKeywordMacroName(const char *) const;
85 TokenSequence ExpandKeywordMacro(CharBlock, Provenance) const;
86
87 const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const;
88 const char *IsCompilerDirectiveSentinel(CharBlock) const;
89 // 'first' is the sentinel, 'second' is beginning of payload
90 std::optional<std::pair<const char *, const char *>>
91 IsCompilerDirectiveSentinel(const char *p) const;
92
93 template <typename... A> Message &Say(A &&...a) {
94 return messages_.Say(std::forward<A>(a)...);
95 }
96 template <typename... A>
97 Message *Warn(common::UsageWarning warning, A &&...a) {
98 return messages_.Warn(false, features_, warning, std::forward<A>(a)...);
99 }
100 template <typename... A>
101 Message *Warn(common::LanguageFeature feature, A &&...a) {
102 return messages_.Warn(false, features_, feature, std::forward<A>(a)...);
103 }
104
105private:
106 struct LineClassification {
107 enum class Kind {
108 Comment,
109 ConditionalCompilationDirective,
110 IncludeDirective, // #include
111 DefinitionDirective, // #define & #undef
112 PreprocessorDirective,
113 IncludeLine, // Fortran INCLUDE
115 CompilerDirectiveAfterMacroExpansion, // !MACRO -> !$OMP ...
116 Source
117 };
118 LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr)
119 : kind{k}, payloadOffset{po}, sentinel{s} {}
120 LineClassification(LineClassification &&) = default;
121 LineClassification &operator=(LineClassification &&) = default;
122 Kind kind;
123 std::size_t payloadOffset; // byte offset of content
124 const char *sentinel; // if it's a compiler directive
125 };
126
127 void BeginSourceLine(const char *at) {
128 at_ = at;
129 column_ = 1;
130 tabInCurrentLine_ = false;
131 }
132
133 void BeginSourceLineAndAdvance() {
134 BeginSourceLine(nextLine_);
135 NextLine();
136 }
137
138 void BeginStatementAndAdvance() {
139 BeginSourceLineAndAdvance();
140 slashInCurrentStatement_ = false;
141 preventHollerith_ = false;
142 parenthesisNesting_ = 0;
143 continuationLines_ = 0;
144 isPossibleMacroCall_ = false;
145 disableSourceContinuation_ = false;
146 }
147
148 Provenance GetProvenance(const char *sourceChar) const {
149 return startProvenance_ + (sourceChar - start_);
150 }
151
152 ProvenanceRange GetProvenanceRange(
153 const char *first, const char *afterLast) const {
154 std::size_t bytes = afterLast - first;
155 return {startProvenance_ + (first - start_), bytes};
156 }
157
158 void EmitChar(TokenSequence &tokens, char ch) {
159 tokens.PutNextTokenChar(ch, GetCurrentProvenance());
160 }
161
162 void EmitInsertedChar(TokenSequence &tokens, char ch) {
163 Provenance provenance{allSources().CompilerInsertionProvenance(ch)};
164 tokens.PutNextTokenChar(ch, provenance);
165 }
166
167 char EmitCharAndAdvance(TokenSequence &tokens, char ch) {
168 EmitChar(tokens, ch);
169 NextChar();
170 return *at_;
171 }
172
173 bool IsOpenMPConditionalLine(const char *sentinel) const {
174 return sentinel && sentinel[0] == '$' && !sentinel[1];
175 }
176 bool IsOpenACCConditionalLine(const char *sentinel) const {
177 return sentinel && sentinel[0] == '@' && sentinel[1] == 'a' &&
178 sentinel[2] == 'c' && sentinel[3] == 'c' && sentinel[4] == '\0';
179 }
180 bool IsCUDAConditionalLine(const char *sentinel) const {
181 return sentinel && sentinel[0] == '@' && sentinel[1] == 'c' &&
182 sentinel[2] == 'u' && sentinel[3] == 'f' && sentinel[4] == '\0';
183 }
184 bool InCompilerDirective() const { return directiveSentinel_ != nullptr; }
185 bool InOpenMPConditionalLine() const {
186 return IsOpenMPConditionalLine(directiveSentinel_);
187 }
188 bool InOpenACCConditionalLine() const {
189 return IsOpenACCConditionalLine(directiveSentinel_);
190 }
191 bool InCUDAConditionalLine() const {
192 return IsCUDAConditionalLine(directiveSentinel_);
193 }
194 bool InOpenACCOrCUDAConditionalLine() const {
195 return InOpenACCConditionalLine() || InCUDAConditionalLine();
196 }
197 bool InConditionalLine() const {
198 return InOpenMPConditionalLine() || InOpenACCOrCUDAConditionalLine();
199 }
200 bool IsOpenMPDirective() const {
201 return directiveSentinel_ && std::strcmp(directiveSentinel_, "$omp") == 0;
202 }
203 bool InFixedFormSource() const {
204 return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective();
205 }
206
207 bool IsCComment(const char *p) const {
208 return p[0] == '/' && p[1] == '*' &&
209 (inPreprocessorDirective_ ||
210 (!inCharLiteral_ &&
211 features_.IsEnabled(
212 common::LanguageFeature::ClassicCComments)));
213 }
214
215 void CheckAndEmitLine(TokenSequence &, Provenance newlineProvenance);
216 void LabelField(TokenSequence &);
217 void EnforceStupidEndStatementRules(const TokenSequence &);
218 void SkipToEndOfLine();
219 bool MustSkipToEndOfLine() const;
220 void NextChar();
221 // True when input flowed to a continuation line
222 bool SkipToNextSignificantCharacter();
223 void SkipCComments();
224 void SkipSpaces();
225 static const char *SkipWhiteSpace(const char *);
226 const char *SkipWhiteSpaceIncludingEmptyMacros(const char *) const;
227 const char *SkipWhiteSpaceAndCComments(const char *) const;
228 const char *SkipCComment(const char *) const;
229 bool NextToken(TokenSequence &);
230 bool HandleExponent(TokenSequence &);
231 bool HandleKindSuffix(TokenSequence &);
232 bool HandleExponentAndOrKindSuffix(TokenSequence &);
233 void QuotedCharacterLiteral(TokenSequence &, const char *start);
234 void Hollerith(TokenSequence &, int count, const char *start);
235 bool PadOutCharacterLiteral(TokenSequence &);
236 bool SkipCommentLine(bool afterAmpersand);
237 bool IsFixedFormCommentLine(const char *) const;
238 const char *IsFreeFormComment(const char *) const;
239 std::optional<std::size_t> IsIncludeLine(const char *) const;
240 void FortranInclude(const char *quote);
241 const char *IsPreprocessorDirectiveLine(const char *) const;
242 const char *FixedFormContinuationLine(bool atNewline);
243 const char *FreeFormContinuationLine(bool ampersand);
244 bool IsImplicitContinuation() const;
245 bool FixedFormContinuation(bool atNewline);
246 bool FreeFormContinuation();
247 bool Continuation(bool mightNeedFixedFormSpace);
248 std::optional<LineClassification> IsFixedFormCompilerDirectiveLine(
249 const char *) const;
250 std::optional<LineClassification> IsFreeFormCompilerDirectiveLine(
251 const char *) const;
252 LineClassification ClassifyLine(const char *) const;
253 LineClassification ClassifyLine(
254 TokenSequence &, Provenance newlineProvenance) const;
255 bool SourceFormChange(std::string &&);
256 bool CompilerDirectiveContinuation(TokenSequence &, const char *sentinel);
257 bool SourceLineContinuation(TokenSequence &);
258 std::optional<LineClassification>
259 IsCompilerDirectiveSentinelAfterKeywordMacro(const char *p) const;
260
261 Messages &messages_;
262 CookedSource &cooked_;
263 Preprocessor &preprocessor_;
264 AllSources &allSources_;
266 bool preprocessingOnly_{false};
267 bool expandIncludeLines_{true};
268 bool isNestedInIncludeDirective_{false};
269 bool backslashFreeFormContinuation_{false};
270 bool inFixedForm_{false};
271 int fixedFormColumnLimit_{72};
272 Encoding encoding_{Encoding::UTF_8};
273 int parenthesisNesting_{0};
274 int prescannerNesting_{0};
275 int continuationLines_{0};
276 bool isPossibleMacroCall_{false};
277 bool afterPreprocessingDirective_{false};
278 bool disableSourceContinuation_{false};
279
280 Provenance startProvenance_;
281 const char *start_{nullptr}; // beginning of current source file content
282 const char *limit_{nullptr}; // first address after end of current source
283 const char *nextLine_{nullptr}; // next line to process; <= limit_
284 const char *directiveSentinel_{nullptr}; // current compiler directive
285
286 // These data members are state for processing the source line containing
287 // "at_", which goes to up to the newline character before "nextLine_".
288 const char *at_{nullptr}; // next character to process; < nextLine_
289 int column_{1}; // card image column position of next character
290 bool tabInCurrentLine_{false};
291 bool slashInCurrentStatement_{false};
292 bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith
293 bool inCharLiteral_{false};
294 bool continuationInCharLiteral_{false};
295 bool inPreprocessorDirective_{false};
296
297 // True after processing a continuation that can't be allowed
298 // to appear in the middle of an identifier token, but is fixed form,
299 // or is free form and doesn't have a space character handy to use as
300 // a separator when:
301 // a) (standard) doesn't begin with a leading '&' on the continuation
302 // line, but has a non-blank in column 1, or
303 // b) (extension) does have a leading '&', but didn't have one
304 // on the continued line.
305 bool brokenToken_{false};
306
307 // When a free form continuation marker (&) appears at the end of a line
308 // before a INCLUDE or #include, we delete it and omit the newline, so
309 // that the first line of the included file is truly a continuation of
310 // the line before. Also used when the & appears at the end of the last
311 // line in an include file.
312 bool omitNewline_{false};
313 bool skipLeadingAmpersand_{false};
314
315 const std::size_t firstCookedCharacterOffset_{cooked_.BufferedBytes()};
316
317 const Provenance spaceProvenance_{
318 allSources().CompilerInsertionProvenance(' ')};
319 const Provenance backslashProvenance_{
320 allSources().CompilerInsertionProvenance('\\')};
321
322 // To avoid probing the set of active compiler directive sentinel strings
323 // on every comment line, they're checked first with a cheap Bloom filter.
324 static const int prime1{1019}, prime2{1021};
325 std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes
326 std::unordered_set<std::string> compilerDirectiveSentinels_;
327};
328} // namespace Fortran::parser
329#endif // FORTRAN_PARSER_PRESCAN_H_
Definition Fortran-features.h:95
Definition provenance.h:139
Definition char-block.h:28
Definition provenance.h:233
Definition message.h:200
Definition message.h:332
Definition preprocessor.h:74
Definition provenance.h:52
Definition token-sequence.h:35
Definition check-expression.h:19
Definition parse-tree.h:3337