Lines 44-58
Link Here
|
44 |
import java.util.logging.Level; |
44 |
import java.util.logging.Level; |
45 |
import java.util.logging.Logger; |
45 |
import java.util.logging.Logger; |
46 |
import org.netbeans.api.lexer.TokenId; |
46 |
import org.netbeans.api.lexer.TokenId; |
47 |
import org.netbeans.lib.lexer.LanguageOperation; |
47 |
import org.netbeans.lib.editor.util.CharSequenceUtilities; |
|
|
48 |
import org.netbeans.lib.lexer.EmbeddedJoinInfo; |
49 |
import org.netbeans.lib.lexer.EmbeddedTokenList; |
50 |
import org.netbeans.lib.lexer.JoinLexerInputOperation; |
51 |
import org.netbeans.lib.lexer.JoinTokenList; |
48 |
import org.netbeans.lib.lexer.LexerInputOperation; |
52 |
import org.netbeans.lib.lexer.LexerInputOperation; |
49 |
import org.netbeans.lib.lexer.LexerUtilsConstants; |
53 |
import org.netbeans.lib.lexer.LexerUtilsConstants; |
50 |
import org.netbeans.lib.lexer.token.AbstractToken; |
54 |
import org.netbeans.lib.lexer.token.AbstractToken; |
51 |
import org.netbeans.spi.lexer.TokenValidator; |
55 |
import org.netbeans.lib.lexer.token.PartToken; |
52 |
|
56 |
|
53 |
|
57 |
|
54 |
/** |
58 |
/** |
55 |
* Token updater fixes a list of tokens constructed for a document |
59 |
* Token list updater fixes a list of tokens constructed for a document |
56 |
* after text of the document gets modified. |
60 |
* after text of the document gets modified. |
57 |
* <br> |
61 |
* <br> |
58 |
* Subclasses need to define all the abstract methods |
62 |
* Subclasses need to define all the abstract methods |
Lines 62-86
Link Here
|
62 |
* Updater looks similar to list iterator |
66 |
* Updater looks similar to list iterator |
63 |
* but there are differences in the semantics |
67 |
* but there are differences in the semantics |
64 |
* of iterator's modification operations. |
68 |
* of iterator's modification operations. |
65 |
* <p> |
69 |
* <br/> |
66 |
* The algorithm used in the {@link #update(int, int)} |
70 |
* The algorithm used in the {@link #update(int, int)} |
67 |
* is based on "General Incremental Lexical Analysis" written |
71 |
* is based on "General Incremental Lexical Analysis" written |
68 |
* by Tim A. Wagner and Susan L. Graham, University |
72 |
* by Tim A. Wagner and Susan L. Graham, University |
69 |
* of California, Berkeley. It's available online |
73 |
* of California, Berkeley. It's available online |
70 |
* at <a href="http://www.cs.berkeley.edu/Research/Projects/harmonia/papers/twagner-lexing.pdf"> |
74 |
* at <a href="http://www.cs.berkeley.edu/Research/Projects/harmonia/papers/twagner-lexing.pdf"> |
71 |
* twagner-lexing.pdf</a>. |
75 |
* twagner-lexing.pdf</a>. |
72 |
* <br> |
76 |
* <br/> |
73 |
* Ending <code>EOF</code> token is not used but the lookahead |
77 |
* Ending <code>EOF</code> token is not used but the lookahead |
74 |
* of the ending token(s) is increased by one (past the end of the input) |
78 |
* of the ending token(s) is increased by one (past the end of the input) |
75 |
* if they have reached the EOF. |
79 |
* if they have reached the EOF. |
76 |
* <br> |
80 |
* <br/> |
77 |
* Non-startable tokens are not supported. |
81 |
* Non-startable tokens are not supported. |
78 |
* <br> |
82 |
* <br/> |
79 |
* When updating a token with lookback one as a result |
83 |
* When updating a token with lookback one as a result |
80 |
* of modification the lookahead of the preceding token is inspected |
84 |
* of modification the lookahead of the preceding token is inspected |
81 |
* to find out whether the modification has really affected it. |
85 |
* to find out whether the modification has really affected it. |
82 |
* This can often save the previous token from being relexed. |
86 |
* This can often save the previous token from being relexed. |
83 |
* <br> |
87 |
* <br/> |
84 |
* Currently the algorithm computes the lookback values on the fly |
88 |
* Currently the algorithm computes the lookback values on the fly |
85 |
* and it does not store the lookback in the tokens. For typical languages |
89 |
* and it does not store the lookback in the tokens. For typical languages |
86 |
* the lookback is reasonably small (0, 1 or 2) so it's usually not worth |
90 |
* the lookback is reasonably small (0, 1 or 2) so it's usually not worth |
Lines 88-106
Link Here
|
88 |
* There would also be an additional overhead of updating the lookback |
92 |
* There would also be an additional overhead of updating the lookback |
89 |
* values in the tokens after the modification and the algorithm code would |
93 |
* values in the tokens after the modification and the algorithm code would |
90 |
* be somewhat less readable. |
94 |
* be somewhat less readable. |
|
|
95 |
* </p> |
91 |
* |
96 |
* |
92 |
* <p> |
97 |
* <p> |
93 |
* The algorithm removes the affected tokens in the natural order as they |
98 |
* The algorithm removes the affected tokens in the natural order as they |
94 |
* follow in the token stream. That can be used when the removed tokens |
99 |
* follow in the token stream. That can be used when the removed tokens |
95 |
* need to be collected (e.g. in an array). |
100 |
* need to be collected (e.g. in an array). |
96 |
* <br> |
101 |
* <br/> |
97 |
* If the offset and state after token recognition matches |
102 |
* If the offset and state after token recognition matches |
98 |
* the end offset and state after recognition of the originally present |
103 |
* the end offset and state after recognition of the originally present |
99 |
* token then the relexing is stopped because a match was found and the newly |
104 |
* token then the relexing is stopped because a match was found and the newly |
100 |
* produced tokens would match the present ones. |
105 |
* produced tokens would match the present ones. |
101 |
* <br> |
106 |
* <br/> |
102 |
* Otherwise the token(s) in the list are removed and replaced |
107 |
* Otherwise the token(s) in the list are removed and replaced |
103 |
* by the relexed token and the relexing continues until a match is reached. |
108 |
* by the relexed token and the relexing continues until a match is reached. |
|
|
109 |
* </p> |
110 |
* |
111 |
* <p> |
112 |
* When using token list updater with JoinTokenList.Mutable there is a special treatment |
113 |
* of offsets independent of the underlying JoinTokenListChange and LexerInputOperation. |
114 |
* The updater treats the modOffset to be relative (in the number of characters) |
115 |
* to the relexOffset point (which is a real first relexed token's offset; it's necessary |
116 |
* for restarting of the lexer input operation) so when going over a JoinToken |
117 |
* the modOffset must be recomputed to not contain the gaps between individual join token parts. |
118 |
* </p> |
104 |
* |
119 |
* |
105 |
* @author Miloslav Metelka |
120 |
* @author Miloslav Metelka |
106 |
* @version 1.00 |
121 |
* @version 1.00 |
Lines 112-354
Link Here
|
112 |
private static final Logger LOG = Logger.getLogger(TokenListUpdater.class.getName()); |
127 |
private static final Logger LOG = Logger.getLogger(TokenListUpdater.class.getName()); |
113 |
|
128 |
|
114 |
/** |
129 |
/** |
115 |
* Use incremental algorithm to update the list of tokens |
130 |
* Use incremental algorithm to update a regular list of tokens (IncTokenList or EmbeddedTokenList) |
116 |
* after a modification done in the underlying storage. |
131 |
* after a modification done in the underlying storage. |
117 |
* |
132 |
* |
118 |
* @param tokenList non-null token list that is being updated. It may be top-level list |
133 |
* @param change non-null change that will incorporate the performed chagnes. |
119 |
* or embedded token list. |
|
|
120 |
* @param modOffset offset where the modification occurred. |
134 |
* @param modOffset offset where the modification occurred. |
121 |
* @param insertedLength number of characters inserted at modOffset. |
135 |
* @param insertedLength number of characters inserted at modOffset. |
122 |
* @param removedLength number of characters removed at modOffset. |
136 |
* @param removedLength number of characters removed at modOffset. |
123 |
* @param change non-null change that will incorporate the performed chagnes. |
|
|
124 |
* @param zeroIndexRelexState state used for relexing at index 0. |
125 |
*/ |
137 |
*/ |
126 |
public static <T extends TokenId> void update(MutableTokenList<T> tokenList, |
138 |
public static <T extends TokenId> void updateRegular(TokenListChange<T> change, TokenHierarchyEventInfo eventInfo) { |
127 |
int modOffset, int insertedLength, int removedLength, |
139 |
MutableTokenList<T> tokenList = change.tokenList(); |
128 |
TokenListChange<T> change, Object zeroIndexRelexState) { |
140 |
int tokenCount = tokenList.tokenCountCurrent(); |
129 |
// Fetch offset where the modification occurred |
|
|
130 |
LanguageOperation<T> languageOperation = LexerUtilsConstants.innerLanguageOperation( |
131 |
tokenList.languagePath()); |
132 |
|
133 |
int tokenCount = tokenList.tokenCountCurrent(); // presently created token count |
134 |
// Now determine which token is the first to be relexed. |
135 |
// If it would be either modified token or previous-of-modified token |
136 |
// (for modification right at the begining of modified token) |
137 |
// then the token will be attempted to be validated (without running |
138 |
// a lexer). |
139 |
AbstractToken<T> modToken; |
140 |
// modTokenOffset holds begining of the token in which the modification occurred. |
141 |
int modTokenOffset; |
142 |
// index points to the modified token |
143 |
int index; |
144 |
|
145 |
boolean loggable = LOG.isLoggable(Level.FINE); |
141 |
boolean loggable = LOG.isLoggable(Level.FINE); |
146 |
if (loggable) { |
142 |
if (loggable) { |
147 |
LOG.log(Level.FINE, "TokenListUpdater.update() STARTED\nmodOffset=" + modOffset |
143 |
logModification(tokenList.inputSourceText(), eventInfo, tokenCount, false); |
148 |
+ ", insertedLength=" + insertedLength |
144 |
} |
149 |
+ ", removedLength=" + removedLength |
145 |
|
150 |
+ ", tokenCount=" + tokenCount + "\n"); |
146 |
// Find modified token by binary search in existing tokens |
|
|
147 |
// Use LexerUtilsConstants.tokenIndexBinSearch() to NOT lazily create new tokens here |
148 |
int[] indexAndTokenOffset = LexerUtilsConstants.tokenIndexBinSearch(tokenList, eventInfo.modOffset(), tokenCount); |
149 |
// Index and offset from which the relexing will start |
150 |
int relexIndex = indexAndTokenOffset[0]; |
151 |
// relexOffset points to begining of a token in which the modification occurred |
152 |
// or which is affected by a modification (its lookahead points beyond modification point). |
153 |
int relexOffset = indexAndTokenOffset[1]; |
154 |
if (relexIndex == -1) { // No tokens at all |
155 |
relexIndex = 0; |
156 |
relexOffset = tokenList.startOffset(); |
151 |
} |
157 |
} |
152 |
|
158 |
|
153 |
if (tokenCount == 0) { // no tokens yet or all removed |
159 |
// Index of token before which the relexing will end (or == tokenCount) |
154 |
if (!tokenList.isFullyLexed()) { |
160 |
int matchIndex = relexIndex; |
155 |
// No tokens created yet (they get created lazily). |
161 |
// Offset of token at matchIndex |
|
|
162 |
int matchOffset = relexOffset; |
163 |
|
164 |
if (relexIndex == tokenCount) { // Change right at end of last token or beyond it (if not fully lexed) |
165 |
// relexOffset set to end offset of the last token |
166 |
if (!tokenList.isFullyLexed() && eventInfo.modOffset() >= relexOffset + |
167 |
((relexIndex > 0) ? tokenList.lookahead(relexIndex - 1) : 0) |
168 |
) { // Do nothing if beyond last token's lookahed |
169 |
// Check whether the last token could be affected at all |
170 |
// by checking whether the modification was performed |
171 |
// in the last token's lookahead. |
172 |
// For fully lexed inputs the characters added to the end |
173 |
// must be properly lexed and notified (even if the last present |
174 |
// token has zero lookahead). |
156 |
if (loggable) { |
175 |
if (loggable) { |
157 |
LOG.log(Level.FINE, "TokenListUpdater.update() FINISHED: Not fully lexed yet.\n"); |
176 |
LOG.log(Level.FINE, "TLU.updateRegular() FINISHED: Not fully lexed yet. rOff=" + |
|
|
177 |
relexOffset + ", mOff=" + eventInfo.modOffset() + "\n"); |
158 |
} |
178 |
} |
159 |
return; // Do nothing in this case |
179 |
change.setIndex(relexIndex); |
160 |
} |
180 |
change.setOffset(relexOffset); |
161 |
// If fully lexed and no tokens then the tokens should start |
181 |
change.setMatchIndex(matchIndex); |
162 |
// right at the modification offset |
182 |
change.setMatchOffset(matchOffset); |
163 |
modToken = null; |
183 |
tokenList.replaceTokens(change, eventInfo.diffLength()); |
164 |
modTokenOffset = modOffset; |
184 |
return; // not affected at all |
165 |
index = 0; |
185 |
} // change.setIndex() will be performed later in relex() |
166 |
|
186 |
|
167 |
} else { // at least one token exists |
187 |
// Leave matchOffset as is (will possibly end relexing at tokenCount and unfinished relexing |
168 |
// Check whether the modification at modOffset might affect existing tokens |
188 |
// will be continued by replaceTokens()). |
169 |
// Get index of the token in which the modification occurred |
189 |
// For fully lexed lists it is necessary to lex till the end of input. |
170 |
// Get the offset of the last token into modTokenOffset variable |
190 |
if (tokenList.isFullyLexed()) |
171 |
index = tokenCount - 1; |
191 |
matchOffset = Integer.MAX_VALUE; |
172 |
modTokenOffset = tokenList.tokenOffset(index); |
|
|
173 |
if (modOffset >= modTokenOffset) { // inside or above the last token? |
174 |
modToken = token(tokenList, index); |
175 |
int modTokenEndOffset = modTokenOffset + modToken.length(); |
176 |
if (modOffset >= modTokenEndOffset) { // above last token |
177 |
// Modification was right at the end boundary of the last token |
178 |
// or above it (token list can be created lazily so that is valid case). |
179 |
// Check whether the last token could be affected at all |
180 |
// by checking the last token's lookahead. |
181 |
// For fully lexed inputs the characters added to the end |
182 |
// must be properly lexed and notified (even if the last present |
183 |
// token has zero lookahead). |
184 |
if (!tokenList.isFullyLexed() |
185 |
&& modOffset >= modTokenEndOffset + tokenList.lookahead(index) |
186 |
) { |
187 |
if (loggable) { |
188 |
LOG.log(Level.FINE, "TokenListUpdater.update() FINISHED: Not fully lexed yet. modTokenOffset=" |
189 |
+ modTokenOffset + ", modToken.length()=" + modToken.length() + "\n"); |
190 |
} |
191 |
return; // not affected at all |
192 |
} |
193 |
|
192 |
|
194 |
index++; |
193 |
} else { // relexIndex < tokenCount |
195 |
modToken = null; |
194 |
// Possibly increase matchIndex and matchOffset by skipping the tokens in the removed area |
196 |
modTokenOffset = modTokenEndOffset; |
195 |
if (eventInfo.removedLength() > 0) { // At least remove token at relexOffset |
197 |
} // else -> modification inside the last token |
196 |
matchOffset += tokenList.tokenOrEmbeddingUnsync(matchIndex++).token().length(); |
198 |
|
197 |
int removedEndOffset = eventInfo.modOffset() + eventInfo.removedLength(); |
199 |
} else { // modification in non-last token |
198 |
while (matchOffset < removedEndOffset && matchIndex < tokenCount) { |
200 |
// Find modified token by binary search |
199 |
matchOffset += tokenList.tokenOrEmbeddingUnsync(matchIndex++).token().length(); |
201 |
int low = 0; // use index as 'high' |
|
|
202 |
while (low <= index) { |
203 |
int mid = (low + index) / 2; |
204 |
int midStartOffset = tokenList.tokenOffset(mid); |
205 |
|
206 |
if (midStartOffset < modOffset) { |
207 |
low = mid + 1; |
208 |
} else if (midStartOffset > modOffset) { |
209 |
index = mid - 1; |
210 |
} else { |
211 |
// Token starting exactly at modOffset found |
212 |
index = mid; |
213 |
modTokenOffset = midStartOffset; |
214 |
break; |
215 |
} |
216 |
} |
200 |
} |
217 |
if (index < low) { // no token starting right at 'modOffset' |
201 |
} else { // For inside-token inserts match on the next token |
218 |
modTokenOffset = tokenList.tokenOffset(index); |
202 |
if (matchOffset < eventInfo.modOffset()) { |
219 |
} |
203 |
matchOffset += tokenList.tokenOrEmbeddingUnsync(matchIndex++).token().length(); |
220 |
modToken = token(tokenList, index); |
|
|
221 |
if (loggable) { |
222 |
LOG.log(Level.FINE, "BIN-SEARCH: index=" + index |
223 |
+ ", modTokenOffset=" + modTokenOffset |
224 |
+ ", modToken.id()=" + modToken.id() + "\n"); |
225 |
} |
204 |
} |
226 |
} |
205 |
} |
|
|
206 |
// Update the matchOffset so that it corresponds to the state |
207 |
// after the modification |
208 |
matchOffset += eventInfo.diffLength(); |
227 |
} |
209 |
} |
228 |
|
210 |
|
229 |
// Store the index that points to the modified token |
211 |
// Check whether modification affected previous token |
230 |
// i.e. modification at its begining or inside. |
212 |
while (relexIndex > 0 && relexOffset + tokenList.lookahead(relexIndex - 1) > eventInfo.modOffset()) { |
231 |
// Index variable can later be modified but present value is important |
213 |
relexIndex--; |
232 |
// for moving of the offset gap later. |
214 |
if (loggable) { |
233 |
change.setOffsetGapIndex(index); |
215 |
LOG.log(Level.FINE, " Token at rInd=" + relexIndex + " affected (la=" + // NOI18N |
234 |
|
216 |
tokenList.lookahead(relexIndex) + ") => relex it\n"); // NOI18N |
235 |
// Index and offset from which the relexing will start. |
|
|
236 |
int relexIndex; |
237 |
int relexOffset; |
238 |
// Whether the token validation should be attempted or not. |
239 |
boolean attemptValidation = false; |
240 |
|
241 |
if (index == 0) { // modToken is first in the list |
242 |
relexIndex = index; |
243 |
relexOffset = modTokenOffset; |
244 |
// Can validate modToken if removal does not span whole token |
245 |
if (modToken != null && removedLength < modToken.length()) { |
246 |
attemptValidation = true; |
247 |
} |
217 |
} |
248 |
|
218 |
AbstractToken<T> token = tokenList.tokenOrEmbeddingUnsync(relexIndex).token(); |
249 |
} else { // Previous token exists |
219 |
relexOffset -= token.length(); |
250 |
// Check for insert-only right at the end of the previous token |
|
|
251 |
if (modOffset == modTokenOffset && removedLength == 0) { |
252 |
index--; // move to previous token |
253 |
modToken = token(tokenList, index); |
254 |
modTokenOffset -= modToken.length(); |
255 |
} |
256 |
|
257 |
// Check whether modification affected previous token |
258 |
if (index == 0 || modTokenOffset + tokenList.lookahead(index - 1) <= modOffset) { |
259 |
// Modification did not affect previous token |
260 |
relexIndex = index; |
261 |
relexOffset = modTokenOffset; |
262 |
// Check whether modification was localized to modToken only |
263 |
if (modOffset + removedLength < modTokenOffset + modToken.length()) { |
264 |
attemptValidation = true; |
265 |
} |
266 |
|
267 |
} else { // at least previous token affected |
268 |
relexOffset = modTokenOffset - token(tokenList, index - 1).length(); |
269 |
relexIndex = index - 2; // Start with token below previous token |
270 |
|
271 |
// Go back and mark all affected tokens for removals |
272 |
while (relexIndex >= 0) { |
273 |
AbstractToken<T> token = token(tokenList, relexIndex); |
274 |
// Check if token was not affected by modification |
275 |
if (relexOffset + tokenList.lookahead(relexIndex) <= modOffset) { |
276 |
break; |
277 |
} |
278 |
relexIndex--; |
279 |
relexOffset -= token.length(); |
280 |
} |
281 |
relexIndex++; // Next token will be relexed |
282 |
} |
283 |
} |
220 |
} |
284 |
|
221 |
|
285 |
// The lowest offset at which the relexing can end |
222 |
// Check whether actual relexing is necessary |
286 |
// (the relexing may end at higher offset if the relexed |
|
|
287 |
// tokens will end at different boundaries than the original |
288 |
// tokens or if the states after the tokens' recognition |
289 |
// will differ from the original states in the original tokens. |
290 |
int matchOffset; |
291 |
|
292 |
// Perform token validation of modToken if possible. |
293 |
// The index variable will hold the token index right before the matching point. |
294 |
if (attemptValidation) { |
295 |
matchOffset = modTokenOffset + modToken.length(); |
296 |
TokenValidator tokenValidator = languageOperation.tokenValidator(modToken.id()); |
297 |
if (tokenValidator != null && (tokenList.getClass() != IncTokenList.class)) { |
298 |
|
299 |
// if (tokenValidator.validateToken(modToken, modOffset - modTokenOffset, modRelOffset, |
300 |
// removedLength, insertedLength) |
301 |
// ) { |
302 |
// // Update positions |
303 |
// change.initRemovedAddedOffsets() |
304 |
|
305 |
// return; // validated successfully |
306 |
// } |
307 |
} |
308 |
|
309 |
} else { // Validation cannot be attempted |
310 |
// Need to compute matchOffset and matchIndex |
311 |
// by iterating forward |
312 |
if (index < tokenCount) { |
313 |
matchOffset = modTokenOffset + modToken.length(); |
314 |
int removeEndOffset = modOffset + removedLength; |
315 |
while (matchOffset < removeEndOffset && index + 1 < tokenCount) { |
316 |
index++; |
317 |
matchOffset += token(tokenList, index).length(); |
318 |
} |
319 |
|
320 |
} else // After last token |
321 |
matchOffset = modTokenOffset; |
322 |
} |
323 |
|
324 |
// State from which the lexer can be started |
223 |
// State from which the lexer can be started |
325 |
Object relexState = (relexIndex > 0) ? tokenList.state(relexIndex - 1) : zeroIndexRelexState; |
224 |
Object relexState = (relexIndex > 0) ? tokenList.state(relexIndex - 1) : null; |
326 |
// Update the matchOffset so that it corresponds to the state |
225 |
change.setIndex(relexIndex); |
327 |
// after the modification |
|
|
328 |
matchOffset += insertedLength - removedLength; |
329 |
change.setOffset(relexOffset); |
226 |
change.setOffset(relexOffset); |
|
|
227 |
change.setMatchIndex(matchIndex); |
228 |
change.setMatchOffset(matchOffset); |
330 |
|
229 |
|
331 |
// Variables' values: |
|
|
332 |
// 'index' - points to modified token. Or index == tokenCount for modification |
333 |
// past the last token. |
334 |
// 'tokenCount' - token count in the original token list. |
335 |
// 'relexIndex' - points to the token that will be relexed as first. |
336 |
// 'relexOffset' - points to begining of the token that will be relexed as first. |
337 |
// 'matchOffset' - points to end of token after which the fixed token list could |
338 |
// possibly match the original token list. Points to end of token at 'index' |
339 |
// variable if 'index < tokenCount' and to the end of the last token |
340 |
// if 'index == tokenCount'. |
341 |
|
342 |
// Check whether relexing is necessary. |
230 |
// Check whether relexing is necessary. |
343 |
// Necessary condition for no-relexing is that the matchToken |
231 |
// Necessary condition for no-relexing is a removal at token's boundary |
344 |
// has zero lookahead (if lookahead would be >0 |
232 |
// and the token right before modOffset must have zero lookahead (if lookahead would be >0 |
345 |
// then the matchToken would be affected and relexOffset != matchOffset). |
233 |
// then the token would be affected) and the states before relexIndex must equal |
346 |
// The states before relex token must match the state after the modified token |
234 |
// to the state before matchIndex. |
347 |
// In case of removal starting and ending at token boundaries |
|
|
348 |
// the relexing might not be necessary. |
349 |
boolean relex = (relexOffset != matchOffset) |
235 |
boolean relex = (relexOffset != matchOffset) |
350 |
|| index >= tokenCount |
236 |
|| (eventInfo.insertedLength() > 0) |
351 |
|| !LexerUtilsConstants.statesEqual(relexState, tokenList.state(index)); |
237 |
|| (matchIndex == 0) // ensure the tokenList.state(matchIndex - 1) will not fail with IOOBE |
|
|
238 |
|| !LexerUtilsConstants.statesEqual(relexState, tokenList.state(matchIndex - 1)); |
352 |
|
239 |
|
353 |
// There is an extra condition that the lookahead of the matchToken |
240 |
// There is an extra condition that the lookahead of the matchToken |
354 |
// must not span the next (retained) token. This condition helps to ensure |
241 |
// must not span the next (retained) token. This condition helps to ensure |
Lines 356-547
Link Here
|
356 |
// As the empty tokens are not allowed the situation may only occur |
243 |
// As the empty tokens are not allowed the situation may only occur |
357 |
// for lookahead > 1. |
244 |
// for lookahead > 1. |
358 |
int lookahead; |
245 |
int lookahead; |
359 |
if (!relex && (lookahead = tokenList.lookahead(index)) > 1 && index + 1 < tokenCount) { |
246 |
if (!relex && (lookahead = tokenList.lookahead(matchIndex - 1)) > 1 && matchIndex < tokenCount) { |
360 |
relex = (lookahead > token(tokenList, index + 1).length()); // check next token |
247 |
// Check whether lookahead of the token before match point exceeds the whole token right after match point |
|
|
248 |
relex = (lookahead > tokenList.tokenOrEmbeddingUnsync(matchIndex).token().length()); // check next token |
361 |
} |
249 |
} |
362 |
|
250 |
|
363 |
if (loggable) { |
251 |
if (loggable) { |
364 |
LOG.log(Level.FINE, "BEFORE-RELEX: index=" + index + ", modTokenOffset=" + modTokenOffset |
252 |
StringBuilder sb = new StringBuilder(200); |
365 |
+ ", relexIndex=" + relexIndex + ", relexOffset=" + relexOffset |
253 |
sb.append("BEFORE-RELEX: relex=").append(relex); |
366 |
+ ", relexState=" + relexState |
254 |
sb.append(", rInd=").append(relexIndex).append(", rOff=").append(relexOffset); |
367 |
+ ", matchOffset=" + matchOffset |
255 |
sb.append(", mInd=").append(matchIndex).append(", mOff=").append(matchOffset).append('\n'); |
368 |
+ ", perform relex: " + relex + "\n"); |
256 |
sb.append(", rSta=").append(relexState).append(", tokenList-part:\n"); |
|
|
257 |
LexerUtilsConstants.appendTokenList(sb, tokenList, matchIndex, matchIndex - 3, matchIndex + 3, false, 4, false); |
258 |
sb.append('\n'); |
259 |
LOG.log(Level.FINE, sb.toString()); |
260 |
} |
261 |
|
262 |
assert (relexIndex >= 0); |
263 |
if (relex) { |
264 |
// Create lexer input operation for the given token list |
265 |
LexerInputOperation<T> lexerInputOperation |
266 |
= tokenList.createLexerInputOperation(relexIndex, relexOffset, relexState); |
267 |
relex(change, lexerInputOperation, tokenCount); |
268 |
} |
269 |
|
270 |
tokenList.replaceTokens(change, eventInfo.diffLength()); |
271 |
if (loggable) { |
272 |
LOG.log(Level.FINE, "TLU.updateRegular() FINISHED: change:" + change + "\nMods:" + change.toStringMods(4)); |
273 |
} |
274 |
} |
275 |
|
276 |
|
277 |
/** |
278 |
* Use incremental algorithm to update a JoinTokenList after a modification done in the underlying storage. |
279 |
* <br> |
280 |
* The assumption is that there may only be two states: |
281 |
* <ul> |
282 |
* <li> There is a local input source modification bounded to a particular ETL. |
283 |
* In such case there should be NO token lists removed/added. |
284 |
* </li> |
285 |
* <li> The modification spans multiple ETLs and all the affected ETLs will be removed. |
286 |
* The modification is "bounded" by the removed ETLs i.e. |
287 |
* modOffset >= first-removed-ETL.startOffset() |
288 |
* and modOffset + removedLength <= last-removed-ETL.endOffset() |
289 |
* </li> |
290 |
* </ul> |
291 |
* |
292 |
* @param change non-null change that will incorporate the performed chagnes. |
293 |
* @param modOffset offset where the modification occurred. |
294 |
* For join token lists if modification is done inside a JoinToken |
295 |
* the modOffset must be a logical distance from token's begining |
296 |
* that corresponds to the modificaion's point (i.e. like if the token |
297 |
* would be continuous). |
298 |
* @param insertedLength number of characters inserted at modOffset. |
299 |
* @param removedLength number of characters removed at modOffset. |
300 |
*/ |
301 |
public static <T extends TokenId> void updateJoined(JoinTokenListChange<T> change, TokenHierarchyEventInfo eventInfo) { |
302 |
MutableJoinTokenList<T> jtl = (MutableJoinTokenList<T>) change.tokenList(); |
303 |
TokenListListUpdate<T> tokenListListUpdate = change.tokenListListUpdate(); |
304 |
int tokenCount = jtl.tokenCount(); |
305 |
boolean loggable = LOG.isLoggable(Level.FINE); |
306 |
if (loggable) { |
307 |
logModification(jtl.inputSourceText(), eventInfo, tokenCount, true); |
369 |
} |
308 |
} |
370 |
|
309 |
|
371 |
if (relex) { // Start relexing |
310 |
// First determine what area is affected by removed/added ETLs |
372 |
LexerInputOperation<T> lexerInputOperation |
311 |
int relexJoinIndex; |
373 |
= tokenList.createLexerInputOperation(relexIndex, relexOffset, relexState); |
312 |
int modOffset = eventInfo.modOffset(); |
|
|
313 |
int relexTokenListIndex = tokenListListUpdate.modTokenListIndex; // Index of ETL where a change occurred. |
314 |
// Relative distance of mod against relex point (or point of ETLs added/removed) |
315 |
int relModOffset; |
316 |
if (tokenListListUpdate.isTokenListsMod()) { |
317 |
// Find relexJoinIndex by examining ETL at relexTokenListIndex-1. |
318 |
// This way the code is more uniform than examining ETL at relexTokenListIndex. |
319 |
if (relexTokenListIndex > 0) { // non-first ETL |
320 |
relexTokenListIndex--; |
321 |
jtl.setActiveTokenListIndex(relexTokenListIndex); |
322 |
EmbeddedTokenList<T> relexEtl = jtl.activeTokenList(); |
323 |
EmbeddedJoinInfo joinInfo = relexEtl.joinInfo; |
324 |
relexJoinIndex = jtl.activeEndJoinIndex(); |
325 |
if (joinInfo.joinTokenLastPartShift() > 0) { // Mod points inside join token |
326 |
// Find first non-empty ETL below to determine partTextOffset() |
327 |
while (relexEtl.tokenCountCurrent() == 0) { // No tokens in ETL |
328 |
jtl.setActiveTokenListIndex(--relexTokenListIndex); |
329 |
relexEtl = jtl.activeTokenList(); |
330 |
} |
331 |
// relexEtl is non-empty - last token is PartToken |
332 |
PartToken<T> partToken = (PartToken<T>) relexEtl.tokenOrEmbeddingUnsync( |
333 |
relexEtl.tokenCountCurrent() - 1).token(); |
334 |
relModOffset = partToken.partTextOffset(); |
335 |
} else { // Not a join token => use first token at relexTokenListIndex |
336 |
relexTokenListIndex++; |
337 |
relModOffset = 0; |
338 |
} |
339 |
} else { // (relexTokenListIndex == 0) |
340 |
relexJoinIndex = 0; |
341 |
jtl.setActiveTokenListIndex(0); |
342 |
relModOffset = 0; |
343 |
} |
374 |
|
344 |
|
375 |
do { // Fetch new tokens from lexer as necessary |
345 |
} else { // No token list mod |
376 |
AbstractToken<T> token = lexerInputOperation.nextToken(); |
346 |
assert ((eventInfo.insertedLength() > 0) || (eventInfo.removedLength() > 0)) : "No modification"; |
377 |
if (token == null) { |
347 |
jtl.setActiveTokenListIndex(relexTokenListIndex); |
378 |
attemptValidation = false; |
348 |
EmbeddedTokenList<T> relexEtl = jtl.activeTokenList(); |
|
|
349 |
change.charModTokenList = relexEtl; |
350 |
// Search within releEtl only - can use binary search safely (unlike on JTL with removed ETLs) |
351 |
int[] indexAndTokenOffset = relexEtl.tokenIndex(modOffset); // Index could be -1 TBD |
352 |
relexJoinIndex = relexEtl.joinInfo.joinTokenIndex() + indexAndTokenOffset[0]; |
353 |
relModOffset = modOffset - indexAndTokenOffset[1]; |
354 |
} |
355 |
|
356 |
// Matching point index and offset. Matching point vars are assigned early |
357 |
// and relex-vars are possibly shifted down first and then the match-vars are updated. |
358 |
// That's because otherwise the "working area" of JTL (above/below token list mod) |
359 |
// would have to be switched below and above. |
360 |
int matchJoinIndex = relexJoinIndex; |
361 |
int matchOffset = modOffset - relModOffset; // Suitable for single-ETL update (will be corrected later) |
362 |
|
363 |
// Update relex-vars according to lookahead of tokens before relexJoinIndex |
364 |
while (relexJoinIndex > 0 && jtl.lookahead(relexJoinIndex - 1) > relModOffset) { |
365 |
AbstractToken<T> relexToken = jtl.tokenOrEmbeddingUnsync(--relexJoinIndex).token(); |
366 |
relModOffset += relexToken.length(); // User regular token.length() here |
367 |
if (loggable) { |
368 |
LOG.log(Level.FINE, " Token at rInd=" + relexJoinIndex + " affected (la=" + // NOI18N |
369 |
jtl.lookahead(relexJoinIndex) + ") => relex it\n"); // NOI18N |
370 |
} |
371 |
} |
372 |
|
373 |
// Create lexer input operation now since JTL should be positioned before removed ETLs |
374 |
// and JLIO needs to scan tokens backwards for fly sequence length. |
375 |
Object relexState = (relexJoinIndex > 0) ? jtl.state(relexJoinIndex - 1) : null; |
376 |
int relexLocalIndex = jtl.tokenStartLocalIndex(relexJoinIndex); |
377 |
relexTokenListIndex = jtl.activeTokenListIndex(); |
378 |
int relexOffset = jtl.activeTokenList().tokenOffsetByIndex(relexLocalIndex); |
379 |
JoinLexerInputOperation<T> lexerInputOperation = new MutableJoinLexerInputOperation<T>( |
380 |
jtl, relexJoinIndex, relexState, relexTokenListIndex, relexOffset, tokenListListUpdate); |
381 |
lexerInputOperation.init(); |
382 |
change.setIndex(relexJoinIndex); |
383 |
change.setOffset(relexOffset); |
384 |
change.setStartInfo(lexerInputOperation, relexLocalIndex); |
385 |
// setMatchIndex() and setMatchOffset() called later below |
386 |
|
387 |
// Index of token before which the relexing will end (or == tokenCount) |
388 |
if (tokenListListUpdate.isTokenListsMod()) { // Assign first token after last removed ETL |
389 |
int afterModTokenListIndex = tokenListListUpdate.modTokenListIndex + tokenListListUpdate.removedTokenListCount; |
390 |
if (afterModTokenListIndex == jtl.tokenListCount()) { // Removed till end |
391 |
matchJoinIndex = tokenCount; |
392 |
matchOffset = Integer.MAX_VALUE; |
393 |
} else { // Removed inside |
394 |
EmbeddedTokenList<T> afterModEtl = jtl.tokenList(afterModTokenListIndex); |
395 |
matchJoinIndex = afterModEtl.joinInfo.joinTokenIndex(); |
396 |
// Check if the first token of afterModEtl is not an end of join token |
397 |
// or that the afterModEtl does not participate in a join token (may be empty) |
398 |
if (afterModEtl.tokenCountCurrent() > 0) { |
399 |
AbstractToken<T> token = afterModEtl.tokenOrEmbeddingUnsync(0).token(); |
400 |
if (token.getClass() == PartToken.class) { |
401 |
matchJoinIndex++; |
402 |
matchOffset = afterModEtl.startOffset() + token.length(); |
403 |
} else { |
404 |
matchOffset = afterModEtl.startOffset(); |
405 |
} |
406 |
} else { // No tokens in this ETL |
407 |
int joinTokenLastPartShift = afterModEtl.joinInfo.joinTokenLastPartShift(); |
408 |
if (joinTokenLastPartShift > 0) { // Part of join token |
409 |
afterModTokenListIndex += joinTokenLastPartShift; |
410 |
matchJoinIndex++; |
411 |
matchOffset = afterModEtl.startOffset(); |
412 |
} else { // Empty ETL but not a part of JoinToken - ending empty ETL(s) |
413 |
matchOffset = afterModEtl.startOffset(); |
414 |
} |
415 |
} |
416 |
// Move jtl past removed/added token lists |
417 |
jtl.setActiveTokenListIndex(afterModTokenListIndex); |
418 |
|
419 |
} |
420 |
} else { // No token ETLs removed/added |
421 |
// matchOffset already initialized to (modOffset - orig-relModOffset) |
422 |
if (eventInfo.removedLength() > 0) { // At least remove token at relexOffset |
423 |
matchOffset += jtl.tokenOrEmbeddingUnsync(matchJoinIndex++).token().length(); |
424 |
int removedEndOffset = eventInfo.modOffset() + eventInfo.removedLength(); |
425 |
while (matchOffset < removedEndOffset) { |
426 |
matchOffset += jtl.tokenOrEmbeddingUnsync(matchJoinIndex++).token().length(); |
427 |
} |
428 |
} else { // For inside-token inserts match on the next token |
429 |
if (matchOffset < eventInfo.modOffset()) { |
430 |
matchOffset += jtl.tokenOrEmbeddingUnsync(matchJoinIndex++).token().length(); |
431 |
} |
432 |
} |
433 |
// Update the matchOffset so that it corresponds to the state |
434 |
// after the modification |
435 |
matchOffset += eventInfo.diffLength(); |
436 |
} |
437 |
|
438 |
// TBD relexing necessity optimizations like in updateRegular() |
439 |
change.setMatchIndex(matchJoinIndex); |
440 |
change.setMatchOffset(matchOffset); |
441 |
relex(change, lexerInputOperation, tokenCount); |
442 |
jtl.replaceTokens(change, eventInfo.diffLength()); |
443 |
if (loggable) { |
444 |
LOG.log(Level.FINE, "TLU.updateRegular() FINISHED: change:" + change + // NOI18N |
445 |
"\nMods:" + change.toStringMods(4)); // NOI18N |
446 |
} |
447 |
} |
448 |
|
449 |
|
450 |
/** |
451 |
* Relex part of input to create new tokens. This method may sometimes be skipped e.g. for removal of chars |
452 |
* corresponding to a single token preceded by a token with zero lookahead. |
453 |
* <br/> |
454 |
* This code is common for both updateRegular() and updateJoined(). |
455 |
* |
456 |
* @param tokenList non-null token list that is being updated. It may be top-level list |
457 |
* or embedded token list. |
458 |
* @param change token list change into which the created tokens are being added. |
459 |
* @param tokenCount current token count in tokenList. |
460 |
*/ |
461 |
private static <T extends TokenId> void relex(TokenListChange<T> change, |
462 |
LexerInputOperation<T> lexerInputOperation, int tokenCount |
463 |
) { |
464 |
boolean loggable = LOG.isLoggable(Level.FINE); |
465 |
MutableTokenList<T> tokenList = change.tokenList(); |
466 |
// Remember the match index below which the comparison of extra relexed tokens |
467 |
// (matching the original ones) cannot go. |
468 |
int lowestMatchIndex = change.matchIndex; |
469 |
|
470 |
AbstractToken<T> token; |
471 |
int relexOffset = lexerInputOperation.lastTokenEndOffset(); |
472 |
while ((token = lexerInputOperation.nextToken()) != null) { |
473 |
// Get lookahead and state; Will certainly use them both since updater runs for inc token lists only |
474 |
int lookahead = lexerInputOperation.lookahead(); |
475 |
Object state = lexerInputOperation.lexerState(); |
476 |
if (loggable) { |
477 |
StringBuilder sb = new StringBuilder(100); |
478 |
sb.append("LEXED-TOKEN: "); |
479 |
int tokenEndOffset = lexerInputOperation.lastTokenEndOffset(); |
480 |
CharSequence inputSourceText = tokenList.inputSourceText(); |
481 |
if (tokenEndOffset > inputSourceText.length()) { |
482 |
sb.append(tokenEndOffset).append("!! => "); |
483 |
tokenEndOffset = inputSourceText.length(); |
484 |
sb.append(tokenEndOffset); |
485 |
} |
486 |
sb.append('"'); |
487 |
CharSequenceUtilities.debugText(sb, inputSourceText.subSequence(relexOffset, tokenEndOffset)); |
488 |
sb.append('"'); |
489 |
sb.append(" ").append(token.id()); |
490 |
sb.append(", <").append(relexOffset); |
491 |
sb.append(", ").append(relexOffset + token.length()); |
492 |
sb.append("> LA=").append(lookahead); |
493 |
sb.append(", state=").append(state); |
494 |
sb.append(", IHC=").append(System.identityHashCode(token)); |
495 |
sb.append("\n"); |
496 |
LOG.log(Level.FINE, sb.toString()); |
497 |
} |
498 |
|
499 |
change.addToken(token, lookahead, state); |
500 |
// Here add regular token length even for JoinToken instances |
501 |
// since this is used solely for comparing with matchOffset which |
502 |
// also uses the per-input-chars coordinates. Real token's offset is independent value |
503 |
// assigned by the underlying TokenListChange and LexerInputOperation. |
504 |
relexOffset = lexerInputOperation.lastTokenEndOffset(); |
505 |
// Marks all original tokens that would cover the area of just lexed token as removed. |
506 |
// 'matchIndex' will point right above the last token that was removed |
507 |
// 'matchOffset' will point to the end of the last removed token |
508 |
if (relexOffset > change.matchOffset) { |
509 |
do { // Mark all tokens below |
510 |
if (change.matchIndex == tokenCount) { // index == tokenCount |
511 |
if (tokenList.isFullyLexed()) { |
512 |
change.matchOffset = Integer.MAX_VALUE; // Force lexing till end of input |
513 |
} else { // Not fully lexed -> stop now |
514 |
// Fake the conditions to break the relexing loop |
515 |
change.matchOffset = relexOffset; |
516 |
state = tokenList.state(change.matchIndex - 1); |
517 |
} |
518 |
break; |
519 |
} |
520 |
// Skip the token at matchIndex and also increase matchOffset |
521 |
// The default (increasing matchOffset by token.length()) is overriden for join token list. |
522 |
change.increaseMatchIndex(); |
523 |
} while (relexOffset > change.matchOffset); |
524 |
} |
525 |
|
526 |
// Check whether the new token ends at matchOffset with the same state |
527 |
// like the original which typically means end of relexing |
528 |
if (relexOffset == change.matchOffset |
529 |
&& LexerUtilsConstants.statesEqual(state, |
530 |
(change.matchIndex > 0) ? tokenList.state(change.matchIndex - 1) : null) |
531 |
) { |
532 |
// Here it's a potential match and the relexing could end. |
533 |
// However there are additional SAME-LOOKAHEAD requirements |
534 |
// that are checked here and if not satisfied the relexing will continue. |
535 |
// SimpleLexerRandomTest.test() contains detailed description. |
536 |
|
537 |
// If there are no more original tokens to be removed then stop since |
538 |
// there are no tokens ahead that would possibly have to be relexed because of LA differences. |
539 |
if (change.matchIndex == tokenCount) |
540 |
break; |
541 |
|
542 |
int matchPointOrigLookahead = (change.matchIndex > 0) |
543 |
? tokenList.lookahead(change.matchIndex - 1) |
544 |
: 0; |
545 |
// If old and new LAs are the same it should be safe to stop relexing. |
546 |
// Also since all tokens are non-empty it's enough to just check |
547 |
// LA > 1 (because LA <= 1 cannot span more than one token). |
548 |
// The same applies for current LA. |
549 |
if (lookahead == matchPointOrigLookahead || |
550 |
matchPointOrigLookahead <= 1 && lookahead <= 1 |
551 |
) { |
552 |
break; |
553 |
} |
554 |
|
555 |
int afterMatchPointTokenLength = tokenList.tokenOrEmbeddingUnsync(change.matchIndex).token().length(); |
556 |
if (matchPointOrigLookahead <= afterMatchPointTokenLength && |
557 |
lookahead <= afterMatchPointTokenLength |
558 |
) { |
559 |
// Here both the original and relexed before-match-point token |
560 |
// have their LAs ending within bounds of the after-match-point token so it's OK |
379 |
break; |
561 |
break; |
380 |
} |
562 |
} |
381 |
|
563 |
|
382 |
lookahead = lexerInputOperation.lookahead(); |
564 |
// It's true that nothing can be generally predicted about LA if the token after match point |
383 |
Object state = lexerInputOperation.lexerState(); |
565 |
// would be relexed (compared to the original's token LA). However the following criteria |
384 |
if (loggable) { |
566 |
// should possibly suffice. |
385 |
LOG.log(Level.FINE, "LEXED-TOKEN: id=" + token.id() |
567 |
int afterMatchPointOrigTokenLookahead = tokenList.lookahead(change.matchIndex); |
386 |
+ ", length=" + token.length() |
568 |
if (lookahead - afterMatchPointTokenLength <= afterMatchPointOrigTokenLookahead && |
387 |
+ ", lookahead=" + lookahead |
569 |
(matchPointOrigLookahead <= afterMatchPointTokenLength || |
388 |
+ ", state=" + state + "\n"); |
570 |
lookahead >= matchPointOrigLookahead) |
389 |
} |
571 |
) { |
390 |
|
572 |
// The orig LA of after-match-point token cannot be lower than the currently lexed LA's projection into it. |
391 |
change.addToken(token, lookahead, state); |
573 |
// Also check that the orig lookahead ended in the after-match-point token |
392 |
|
574 |
// or otherwise require the relexed before-match-point token to have >= lookahead of the original |
393 |
relexOffset += token.length(); |
575 |
// before-match-point token). |
394 |
// Remove obsolete tokens that would cover the area of just lexed token |
576 |
break; |
395 |
// 'index' will point to the last token that was removed |
|
|
396 |
// 'matchOffset' will point to the end of the last removed token |
397 |
if (relexOffset > matchOffset && index < tokenCount) { |
398 |
attemptValidation = false; |
399 |
do { |
400 |
index++; |
401 |
if (index == tokenCount) { |
402 |
// Make sure the number of removed tokens will be computed properly later |
403 |
modToken = null; |
404 |
// Check whether it should lex till the end |
405 |
// or whether 'Match at anything' should be done |
406 |
if (tokenList.isFullyLexed()) { |
407 |
// Will lex till the end of input |
408 |
matchOffset = Integer.MAX_VALUE; |
409 |
} else { |
410 |
// Force stop lexing |
411 |
relex = false; |
412 |
} |
413 |
break; |
414 |
} |
415 |
matchOffset += token(tokenList, index).length(); |
416 |
} while (relexOffset > matchOffset); |
417 |
} |
577 |
} |
418 |
|
578 |
|
419 |
// Check whether the new token ends at matchOffset with the same state |
579 |
// The token at matchIndex must be relexed |
420 |
// like the original which typically means end of relexing |
580 |
if (loggable) { |
421 |
if (relexOffset == matchOffset |
581 |
LOG.log(Level.FINE, " EXTRA-RELEX: mInd=" + change.matchIndex + ", LA=" + lookahead + "\n"); |
422 |
&& (index < tokenCount) |
582 |
} |
423 |
&& LexerUtilsConstants.statesEqual(state, tokenList.state(index)) |
583 |
// Skip the token at matchIndex |
424 |
) { |
584 |
change.increaseMatchIndex(); |
425 |
// Here it's a potential match and the relexing could end. |
585 |
// Continue by fetching next token |
426 |
// However there are additional conditions that need to be checked. |
586 |
} |
427 |
// 1. Check whether lookahead of the last relexed token |
587 |
} |
428 |
// does not exceed length plus LA of the subsequent (original) token. |
588 |
lexerInputOperation.release(); |
429 |
// See initial part of SimpleRandomTest.test() verifies this. |
|
|
430 |
// 2. Algorithm attempts to have the same lookaheads in tokens |
431 |
// like the regular batch scanning would produce. |
432 |
// Although not strictly necessary requirement |
433 |
// it helps to simplify the debugging in case the lexer does not work |
434 |
// well in the incremental setup. |
435 |
// The following code checks that the lookahead of the original match token |
436 |
// (i.e. the token right before matchOffset) does "end" inside |
437 |
// the next token - if not then relexing the next token is done. |
438 |
// The second part of SimpleRandomTest.test() verifies this. |
439 |
|
589 |
|
440 |
// 'index' points to the last token that was removed |
590 |
// If at least two tokens were lexed it's possible that e.g. the last added token |
441 |
int matchTokenLookahead = tokenList.lookahead(index); |
591 |
// will be the same like the last removed token and in such case |
442 |
// Optimistically suppose that the relexing will end |
592 |
// the addition of the last token should be 'undone'. |
443 |
relex = false; |
593 |
// This all may happen due to the fact that for larger lookaheads |
444 |
// When assuming non-empty tokens the lookahead 1 |
594 |
// the algorithm must relex the token(s) within lookahead (see the code above). |
445 |
// just reaches the end of the next token |
595 |
int lastAddedTokenIndex = change.addedTokenOrEmbeddingsCount() - 1; |
446 |
// so lookhead < 1 is always fine from this point of view. |
596 |
// There should remain at least one added token since that one |
447 |
if (matchTokenLookahead > 1 || lookahead > 1) { |
597 |
// may not be the same like the original removed one because |
448 |
// Start with token right after the last removed token starting at matchOffset |
598 |
// token lengths would differ because of the input source modification. |
449 |
int i = index + 1; |
599 |
|
450 |
// Process additional removals by increasing 'index' |
600 |
if (change.matchOffset != Integer.MAX_VALUE) { // would not make sense when lexing past end of existing tokens |
451 |
// 'lookahead' holds |
601 |
while (lastAddedTokenIndex >= 1 && // At least one token added |
452 |
while (i < tokenCount) { |
602 |
change.matchIndex > lowestMatchIndex // At least one token removed |
453 |
int tokenLength = token(tokenList, i).length(); |
603 |
) { |
454 |
lookahead -= tokenLength; // decrease extra lookahead |
604 |
AbstractToken<T> lastAddedToken = change.addedTokenOrEmbeddings().get(lastAddedTokenIndex).token(); |
455 |
matchTokenLookahead -= tokenLength; |
605 |
AbstractToken<T> lastRemovedToken = tokenList.tokenOrEmbeddingUnsync(change.matchIndex - 1).token(); |
456 |
if (lookahead <= 0 && matchTokenLookahead <=0) { |
606 |
if (lastAddedToken.id() != lastRemovedToken.id() |
457 |
break; // No more work |
607 |
|| lastAddedToken.length() != lastRemovedToken.length() |
458 |
} |
608 |
|| change.laState().lookahead(lastAddedTokenIndex) != tokenList.lookahead(change.matchIndex - 1) |
459 |
if (lookahead != tokenList.lookahead(i) |
|
|
460 |
|| matchTokenLookahead > 0 |
461 |
) { |
462 |
// This token must be relexed |
463 |
if (loggable) { |
464 |
LOG.log(Level.FINE, "EXTRA-RELEX: index=" + index + ", lookahead=" + lookahead |
465 |
+ ", tokenLength=" + tokenLength + "\n"); |
466 |
} |
467 |
index = i; |
468 |
matchOffset += tokenLength; |
469 |
relex = true; |
470 |
// Continue - further tokens may be affected |
471 |
} |
472 |
i++; |
473 |
} |
474 |
} |
475 |
|
476 |
if (!relex) { |
477 |
if (attemptValidation) { |
478 |
// if (modToken.id() == token.id() |
479 |
// && tokenList.lookahead(index) == lookahead |
480 |
// && !modToken.isFlyweight() |
481 |
// && !token.isFlyweight() |
482 |
// && (tokenList.getClass() != IncTokenList.class |
483 |
// || change.tokenHierarchyOperation().canModifyToken(index, modToken)) |
484 |
// && LexerSpiTokenPackageAccessor.get().restoreToken( |
485 |
// languageOperation.tokenHandler(), |
486 |
// modToken, token) |
487 |
// ) { |
488 |
// // Restored successfully |
489 |
// // TODO implement - fix token's length and return |
490 |
// // now default in fact to failed validation |
491 |
// } |
492 |
attemptValidation = false; |
493 |
} |
494 |
} |
495 |
} |
496 |
} while (relex); // End of the relexing loop |
497 |
lexerInputOperation.release(); |
498 |
|
499 |
// If at least two tokens were lexed it's possible that e.g. the last added token |
500 |
// will be the same like the last removed token and in such case |
501 |
// the addition of the last token should be 'undone'. |
502 |
// This all may happen due to the fact that for larger lookaheads |
503 |
// the algorithm must relex the token(s) within lookahead (see the code above). |
504 |
int lastAddedTokenIndex = change.addedTokensOrBranchesCount() - 1; |
505 |
// There should remain at least one added token since that one |
506 |
// may not be the same like the original removed one because |
507 |
// token lengths would differ because of the input source modification. |
508 |
while (lastAddedTokenIndex >= 1 && index > relexIndex && index < tokenCount) { |
509 |
AbstractToken<T> addedToken = LexerUtilsConstants.token( |
510 |
change.addedTokensOrBranches().get(lastAddedTokenIndex)); |
511 |
AbstractToken<T> removedToken = token(tokenList, index); |
512 |
if (addedToken.id() != removedToken.id() |
513 |
|| addedToken.length() != removedToken.length() |
514 |
|| change.laState().lookahead(lastAddedTokenIndex) != tokenList.lookahead(index) |
515 |
|| !LexerUtilsConstants.statesEqual(change.laState().state(lastAddedTokenIndex), |
609 |
|| !LexerUtilsConstants.statesEqual(change.laState().state(lastAddedTokenIndex), |
516 |
tokenList.state(index)) |
610 |
tokenList.state(change.matchIndex - 1)) |
517 |
) { |
611 |
) { |
518 |
break; |
612 |
break; |
519 |
} |
613 |
} |
520 |
// Last removed and added tokens are the same so undo the addition |
614 |
// Last removed and added tokens are the same so undo the addition |
521 |
if (loggable) { |
615 |
if (loggable) { |
522 |
LOG.log(Level.FINE, "RETAIN-ORIGINAL: index=" + index + ", id=" + removedToken.id() + "\n"); |
616 |
LOG.log(Level.FINE, " RETAIN-ORIGINAL at (mInd-1)=" + (change.matchIndex-1) + |
|
|
617 |
", id=" + lastRemovedToken.id() + "\n"); |
523 |
} |
618 |
} |
524 |
lastAddedTokenIndex--; |
619 |
lastAddedTokenIndex--; |
525 |
index--; |
620 |
change.removeLastAddedToken(); // Includes decreasing of matchIndex and matchOffset |
526 |
relexOffset -= addedToken.length(); |
|
|
527 |
change.removeLastAddedToken(); |
528 |
} |
621 |
} |
|
|
622 |
} else { // matchOffset == Integer.MAX_VALUE |
623 |
// Fix matchOffset to point to end of last token since it's used |
624 |
// as last-added-token-end-offset in event notifications |
625 |
change.setMatchOffset(relexOffset); |
529 |
} |
626 |
} |
|
|
627 |
} |
530 |
|
628 |
|
531 |
// Now ensure that the original tokens will be replaced by the relexed ones. |
629 |
private static void logModification(CharSequence inputSourceText, TokenHierarchyEventInfo eventInfo, |
532 |
int removedTokenCount = (modToken != null) ? (index - relexIndex + 1) : (index - relexIndex); |
630 |
int tokenCount, boolean updateJoined |
533 |
if (loggable) { |
631 |
) { |
534 |
LOG.log(Level.FINE, "TokenListUpdater.update() FINISHED: Removed:" |
632 |
int modOffset = eventInfo.modOffset(); |
535 |
+ removedTokenCount + ", Added:" + change.addedTokensOrBranchesCount() + " tokens.\n"); |
633 |
int removedLength = eventInfo.removedLength(); |
|
|
634 |
int insertedLength = eventInfo.insertedLength(); |
635 |
String insertedText = ""; |
636 |
if (insertedLength > 0) { |
637 |
insertedText = ", insTxt:\"" + CharSequenceUtilities.debugText( |
638 |
inputSourceText.subSequence(modOffset, modOffset + insertedLength)) + '"'; |
536 |
} |
639 |
} |
537 |
change.setIndex(relexIndex); |
640 |
// Debug 10 chars around modOffset |
538 |
change.setAddedEndOffset(relexOffset); |
641 |
int afterInsertOffset = modOffset + insertedLength; |
539 |
tokenList.replaceTokens(change, removedTokenCount, insertedLength - removedLength); |
642 |
CharSequence beforeText = inputSourceText.subSequence(Math.max(afterInsertOffset - 5, 0), afterInsertOffset); |
540 |
} |
643 |
CharSequence afterText = inputSourceText.subSequence(afterInsertOffset, |
541 |
|
644 |
Math.min(afterInsertOffset + 5, inputSourceText.length())); |
542 |
private static <T extends TokenId> AbstractToken<T> token(MutableTokenList<T> tokenList, int index) { |
645 |
StringBuilder sb = new StringBuilder(200); |
543 |
Object tokenOrEmbeddingContainer = tokenList.tokenOrEmbeddingContainerUnsync(index); // Unsync impl suffices |
646 |
sb.append("TLU.update"); |
544 |
return LexerUtilsConstants.token(tokenOrEmbeddingContainer); |
647 |
sb.append(updateJoined ? "Joined" : "Regular"); |
|
|
648 |
sb.append("() modOff=").append(modOffset); |
649 |
sb.append(", text-around:\"").append(beforeText).append('|'); |
650 |
sb.append(afterText).append("\", insLen="); |
651 |
sb.append(insertedLength).append(insertedText); |
652 |
sb.append(", remLen=").append(removedLength); |
653 |
sb.append(", tCnt=").append(tokenCount).append('\n'); |
654 |
LOG.log(Level.FINE, sb.toString()); |
545 |
} |
655 |
} |
546 |
|
656 |
|
547 |
} |
657 |
} |